def _train_and_test(self, model_path, output_model_path, training_datapoints, test_datapoints, keep_classes=False): engine = ImprintingEngine(model_path, keep_classes) image_shape = self._get_image_shape(model_path) data_dir = test_utils.test_data_path('imprinting') # train. for training_datapoint in training_datapoints: engine.train( test_utils.prepare_images(training_datapoint['image_names'], data_dir, image_shape), training_datapoint['label_id']) engine.save_model(output_model_path) # Test. engine = ClassificationEngine(output_model_path) self.assertEqual(1, engine.get_num_of_output_tensors()) if not keep_classes: self.assertEqual(len(training_datapoints), engine.get_output_tensor_size(0)) for test_datapoint in test_datapoints: self._classify_image(engine, data_dir, test_datapoint['image_name'], test_datapoint['label_id'], test_datapoint['score'])
def test_train_all(self): for model_path in self._MODEL_LIST: with self.subTest(): with test_utils.TemporaryFile( suffix='.tflite') as output_model_path: data_dir = test_utils.test_data_path('imprinting') engine = ImprintingEngine(model_path, keep_classes=False) image_shape = self._get_image_shape(model_path) # train. train_set = [['cat_train_0.bmp'], ['dog_train_0.bmp'], ['hotdog_train_0.bmp', 'hotdog_train_1.bmp']] train_input = [ (test_utils.prepare_images(image_list, data_dir, image_shape)) for image_list in train_set ] engine.train_all(train_input) engine.save_model(output_model_path.name) # Test. engine = ClassificationEngine(output_model_path.name) self.assertEqual(1, engine.get_num_of_output_tensors()) self.assertEqual(3, engine.get_output_tensor_size(0)) label_to_id_map = {'cat': 0, 'dog': 1, 'hot_dog': 2} self._classify_image(engine, data_dir, 'cat_test_0.bmp', label_to_id_map['cat'], 0.99) self._classify_image(engine, data_dir, 'dog_test_0.bmp', label_to_id_map['dog'], 0.99) self._classify_image(engine, data_dir, 'hotdog_test_0.bmp', label_to_id_map['hot_dog'], 0.99)
def run_benchmark(model): """Measures training time for given model with random data. Args: model: string, file name of the input model. Returns: float, training time. """ input_size = input_tensor_size(model) engine = ImprintingEngine(test_utils.test_data_path(model), keep_classes=False) np.random.seed(12345) data_by_category = {} # 10 Categories, each has 20 images. for i in range(0, 10): data_by_category[i] = [] for j in range(0, 20): data_by_category[i].append(np.random.randint(0, 255, input_size)) start = time.perf_counter() for class_id, tensors in enumerate(data_by_category.values()): engine.train(tensors, class_id) with tempfile.NamedTemporaryFile() as f: engine.save_model(f.name) training_time = time.perf_counter() - start print('Model: %s' % model) print('Training time: %.2fs' % training_time) return training_time
def main(): args = _parse_args() print('--------------- Parsing data set -----------------') print('Dataset path:', args.data) train_set, test_set = _read_data(args.data, args.test_ratio) print('Image list successfully parsed! Category Num = ', len(train_set)) shape = _get_required_shape(args.model_path) print('---------------- Processing training data ----------------') print('This process may take more than 30 seconds.') train_input = [] labels_map = {} for class_id, (category, image_list) in enumerate(train_set.items()): print('Processing category:', category) train_input.append( _prepare_images(image_list, os.path.join(args.data, category), shape)) labels_map[class_id] = category print('---------------- Start training -----------------') engine = ImprintingEngine(args.model_path) engine.train_all(train_input) print('---------------- Training finished! -----------------') engine.save_model(args.output) print('Model saved as : ', args.output) _save_labels(labels_map, args.output) print('------------------ Start evaluating ------------------') engine = ClassificationEngine(args.output) top_k = 5 correct = [0] * top_k wrong = [0] * top_k for category, image_list in test_set.items(): print('Evaluating category [', category, ']') for img_name in image_list: img = Image.open(os.path.join(args.data, category, img_name)) candidates = engine.classify_with_image(img, threshold=0.1, top_k=top_k) recognized = False for i in range(top_k): if i < len(candidates) and labels_map[candidates[i] [0]] == category: recognized = True if recognized: correct[i] = correct[i] + 1 else: wrong[i] = wrong[i] + 1 print('---------------- Evaluation result -----------------') for i in range(top_k): print('Top {} : {:.0%}'.format(i + 1, correct[i] / (correct[i] + wrong[i])))
def test_imprinting_engine_saving_without_training(self): model_list = [ 'imprinting/mobilenet_v1_1.0_224_l2norm_quant.tflite', 'imprinting/mobilenet_v1_1.0_224_l2norm_quant_edgetpu.tflite' ] for model in model_list: error_message = None engine = ImprintingEngine( test_utils.test_data_path(model), keep_classes=False) try: with tempfile.NamedTemporaryFile(suffix='.tflite') as output_model_path: engine.save_model(output_model_path.name) except RuntimeError as e: error_message = str(e) self.assertEqual('Model without training won\'t be saved!', error_message)
def _benchmark_for_training(model, data_set): """Measures training time for given model and data set. Args: model: string, file name of the input model. data_set: string, name of the folder storing images. Labels file is also named as '[data_set].csv'. Returns: float, training time. """ shape = _get_shape(model) engine = ImprintingEngine(test_utils.test_data_path('imprinting', model), keep_classes=False) output_model_path = '/tmp/model_for_benchmark.tflite' data_dir = test_utils.test_data_path(data_set) # The labels file is named as '[data_set].csv'. image_list_by_category = test_utils.prepare_classification_data_set( test_utils.test_data_path(data_set + '.csv')) start_time = time.monotonic() for category, image_list in image_list_by_category.items(): category_dir = os.path.join(data_dir, category) image_list_by_category[category] = test_utils.prepare_images( image_list, category_dir, shape) end_time = time.monotonic() print('Image pre-processing time: ', end_time - start_time, 's') start_time = end_time for class_id, tensors in enumerate(image_list_by_category.values()): engine.train(tensors, class_id) engine.save_model(output_model_path) training_time = time.monotonic() - start_time print('Model: ', model) print('Data set : ', data_set) print('Training time : ', training_time, 's') # Remove the model. subprocess.call(['rm', output_model_path]) return training_time
def retrain_model(props): """ This function is using the Imprinting technique to retrain the model by only changing the last layer. All classes will be abandoned while training multiple users """ MODEL_PATH = props['classification']['default_path'] click.echo('Parsing data for retraining...') train_set = {} test_set = {} for user in props['user'].keys(): image_dir = props['user'][user]['images'] images = [ f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f)) ] if images: # allocate the number of images for training an validation net_pictures = len(images) click.echo( click.style('We found {} pictures for {}'.format( net_pictures, user), fg='green')) while True: k = int( click.prompt( 'How many pictures do you want for validating the training?' )) if k > 0.25 * net_pictures: click.echo( click.style( 'At most 25% ({} pictures) of the training data can be used for testing the model!' .format(int(0.25 * net_pictures)), fg='yellow')) elif k < 2: click.echo( click.style( 'At least 3 pictues must be used for testing the model!', fg='yellow')) else: break test_set[user] = images[:k] assert test_set, 'No images to test [{}]'.format(user) train_set[user] = images[k:] assert train_set, 'No images to train [{}]'.format(user) #get shape of model to retrain tmp = BasicEngine(MODEL_PATH) input_tensor = tmp.get_input_tensor_shape() shape = (input_tensor[2], input_tensor[1]) #rezising pictures and creating new labels map train_input = [] labels_map = {} for user_id, (user, image_list) in enumerate(train_set.items()): ret = [] for filename in image_list: with Image.open( os.path.join(props['user'][user]['images'], filename)) as img: img = img.convert('RGB') img = img.resize(shape, Image.NEAREST) ret.append(np.asarray(img).flatten()) train_input.append(np.array(ret)) labels_map[user_id] = user #Train model click.echo('Start training') engine = ImprintingEngine(MODEL_PATH, keep_classes=False) engine.train_all(train_input) click.echo(click.style('Training finished!', fg='green')) #gethering old model files old_model = props['classification']['path'] old_labels = props['classification']['labels'] #saving new model props['classification']['path'] = './Models/model{}.tflite'.format(''.join( ['_' + u for u in labels_map.values()])) engine.save_model(props['classification']['path']) #saving labels props['classification']['labels'] = props['classification'][ 'path'].replace('classification', 'labels').replace('tflite', 'json') with open(props['classification']['labels'], 'w') as f: json.dump(labels_map, f, indent=4) #Evaluating how well the retrained model performed click.echo('Start evaluation') engine = ClassificationEngine(props['classification']['path']) top_k = 5 correct = [0] * top_k wrong = [0] * top_k for user, image_list in test_set.items(): for img_name in image_list: img = Image.open( os.path.join(props['user'][user]['images'], img_name)) candidates = engine.classify_with_image(img, threshold=0.1, top_k=top_k) recognized = False for i in range(top_k): if i < len(candidates) and user == labels_map[candidates[i] [0]]: recognized = True if recognized: correct[i] = correct[i] + 1 else: wrong[i] = wrong[i] + 1 click.echo('Evaluation Results:') for i in range(top_k): click.echo('Top {} : {:.0%}'.format( i + 1, correct[i] / (correct[i] + wrong[i]))) # TODO highlight with colors how well it perforemed if not old_model == props['classification'][ 'path'] and not old_labels == props['classification'][ 'labels'] and (os.path.exists(old_labels) or os.path.exists(old_model)): if not click.confirm('Do you want to keep old models?'): os.remove(old_model) os.remove(old_labels) click.echo(click.style('Old models removed.', fg='green')) #saving properties save_properties(props)
def _transfer_learn_and_evaluate(self, model_path, keep_classes, dataset_path, test_ratio, top_k_range): """Transfer-learns with given params and returns the evaluatoin result. Args: model_path: string, path of the base model. keep_classes: bool, whether to keep base model classes. dataset_path: string, path to the directory of dataset. The images should be put under sub-directory named by category. test_ratio: float, the ratio of images used for test. top_k_range: int, top_k range to be evaluated. The function will return accuracy from top 1 to top k. Returns: list of float numbers. """ print('--------------- Parsing dataset ----------------') print('Dataset path:', dataset_path) # train in fixed order to ensure the same evaluation result. train_set, test_set = test_utils.prepare_data_set_from_directory( dataset_path, test_ratio, True) print('Image list successfully parsed! Number of Categories = ', len(train_set)) input_shape = self._get_input_tensor_shape(model_path) required_image_shape = (input_shape[2], input_shape[1] ) # (width, height) print('--------------- Processing training data ----------------') print('This process may take more than 30 seconds.') num_classes = self._get_output_number_classes( model_path) if keep_classes else 0 train_input = [] labels_map = {} for class_id, (category, image_list) in enumerate(train_set.items()): print('Processing {} ({} images)'.format(category, len(image_list))) train_input.append( test_utils.prepare_images(image_list, os.path.join(dataset_path, category), required_image_shape)) labels_map[num_classes + class_id] = category # train print('---------------- Start training -----------------') imprinting_engine = ImprintingEngine(model_path, keep_classes) imprinting_engine.train_all(train_input) print('---------------- Training finished -----------------') output_model_path = tempfile.NamedTemporaryFile(suffix='.tflite') imprinting_engine.save_model(output_model_path.name) # Evaluate print('---------------- Start evaluating -----------------') classification_engine = ClassificationEngine(output_model_path.name) # top[i] represents number of top (i+1) correct inference. top_k_correct_count = [0] * top_k_range image_num = 0 for category, image_list in test_set.items(): n = len(image_list) print('Evaluating {} ({} images)'.format(category, n)) for image_name in image_list: with test_image( os.path.join(dataset_path, category, image_name)) as raw_image: # Set threshold as a negative number to ensure we get top k candidates # even if its score is 0. candidates = classification_engine.classify_with_image( raw_image, threshold=-0.1, top_k=top_k_range) for i in range(len(candidates)): if candidates[i][0] in labels_map and labels_map[ candidates[i][0]] == category: top_k_correct_count[i] += 1 break image_num += n for i in range(1, top_k_range): top_k_correct_count[i] += top_k_correct_count[i - 1] return [top_k_correct_count[i] / image_num for i in range(top_k_range)]