def train(): protobuf_dir = dirs.get_protobuf_dir( 'root') # Read the protobuffer files for the initial car dataset # make sure ckpt files are there and correct data is in it labels = 0 if not os.path.isfile(os.path.join(protobuf_dir, 'labels.txt')): print('Missing labels in %s' % os.path.join(protobuf_dir, 'labels.txt')) return None for _ in open(os.path.join(protobuf_dir, 'labels.txt')): labels += 1 if not labels == 12: print('Wrong number of labels: %s in: %s' % (labels, protobuf_dir)) return None # /home/markus/projects/cnn_server/model/bot_root bot_model_dir = dirs.get_model_data_dir('root') # make sure bot_model_dir is there and empty if not os.path.isdir(bot_model_dir): os.mkdir(bot_model_dir) trainer.train( bot_model_dir=bot_model_dir, protobuf_dir=protobuf_dir, max_train_time_sec=(60 * 60 * 24 * 7), # Adjust training time here. optimization_params=None, log_every_n_steps=10)
def train(bot_id, test=False, max_train_time=None): if test: max_train_time = 60 # If we run a test, train for one minute only root_model_dir = dirs.get_root_model_dir() bot_model_dir = dirs.get_model_data_dir(bot_id) bot_protobuf_dir = dirs.get_protobuf_dir(bot_id) # root_model_dir must exist, not be empty and contain a checkpoints file if not os.path.exists(root_model_dir): print('root_model_dir %s does not exist' % root_model_dir) return False if not os.listdir(root_model_dir): print('root_model_dir %s is empty' % root_model_dir) return False if not os.path.isfile(os.path.join(root_model_dir, 'checkpoint')): print('no checkpoint files in root_model_dir %s' % root_model_dir) return False # bot_model_dir must exist and be empty if not os.path.exists(bot_model_dir): print('bot_model_dir %s does not exist' % bot_model_dir) return False if os.listdir(bot_model_dir): print('bot_model_dir %s is not empty' % bot_model_dir) return False # bot_protobuf_dir must exist and not be empty if not os.path.exists(bot_protobuf_dir): print('bot_protobuf_dir %s does not exist' % bot_protobuf_dir) return False if not os.listdir(bot_protobuf_dir): print('bot_protobuf_dir %s does not contain training data' % bot_protobuf_dir) return False transfer_learning.transfer_learning( root_model_dir=root_model_dir, bot_model_dir=bot_model_dir, protobuf_dir=bot_protobuf_dir, dataset_name='bot', dataset_split_name='train', model_name='inception_v4', checkpoint_exclude_scopes=['InceptionV4/Logits', 'InceptionV4/AuxLogits'], trainable_scopes=['InceptionV4/Logits', 'InceptionV4/AuxLogits'], max_train_time_sec=max_train_time ) # After Transfer Learning bot_model_dir must exist, not be empty and contain a checkpoint file if not os.path.exists(bot_model_dir): print('bot_model_dir %s does not exist after transfer learning' % bot_model_dir) return False if not os.listdir(bot_model_dir): print('bot_model_dir %s is empty after transfer learning' % bot_model_dir) return False if not os.path.isfile(os.path.join(bot_model_dir, 'checkpoint')): print('no checkpoint file in bot_model_dir %s after transfer learning' % bot_model_dir) # TODO: Implement proper validation of the createed model file: read ckpt path from first line and lookup in folder return True
def convert(bot_id): training_data_dir = dirs.get_training_data_dir(bot_id) protobuf_dir = dirs.get_protobuf_dir(bot_id) print('Converting training data for %s' % bot_id) start_time = time.time() if _check_training_dir(training_data_dir) and _check_proto_dir( protobuf_dir): converter.run(training_data_dir, protobuf_dir, fract_validation=0.2) print('Converted training data for %s in %s sec' % (bot_id, (time.time() - start_time)))
def test_get_bot_id_from_dir(self): bmw_models_bot_id = 'bmw_models' training_data_dir = dirs.get_training_data_dir(bmw_models_bot_id) protobuf_dir = dirs.get_protobuf_dir(bmw_models_bot_id) model_dir = dirs.get_model_data_dir(bmw_models_bot_id) bot_id = dirs.get_bot_id_from_dir(training_data_dir) self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match') bot_id = dirs.get_bot_id_from_dir(protobuf_dir) self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match') bot_id = dirs.get_bot_id_from_dir(model_dir) self.assertEqual(bmw_models_bot_id, bot_id, 'bot ids do not match')
def test_handle_post(self): if not os.listdir(dirs.get_protobuf_dir(TEST_BOT_ID)): shutil.copytree(os.path.join(FILES_DIR, 'flower_protobuf'), dirs.get_protobuf_dir(TEST_BOT_ID)) if os.path.exists(dirs.get_model_data_dir(TEST_BOT_ID)): shutil.rmtree(dirs.get_model_data_dir(TEST_BOT_ID)) shutil.copytree(os.path.join(FILES_DIR, 'protobuf/bot_test'), dirs.get_model_data_dir(TEST_BOT_ID)) expected_return_labels = 3 temp_file = tempfile.NamedTemporaryFile() temp_file.write( base64.b64encode( open( os.path.join(FILES_DIR, 'tulip.jpg'), "rb" ).read() ) ) temp_file.seek(0) json_result, status = handler.handle_post(TEST_BOT_ID, temp_file.read(), return_labels=expected_return_labels) temp_file.close() json_result = json.loads(json_result) labels = json_result['labels'] probs = json_result['probabilities'] self.assertTrue(labels) self.assertTrue(probs) self.assertEqual(expected_return_labels, len(labels)) self.assertEqual(expected_return_labels, len(probs)) # Clean the bot_model directory for next test run for file in os.listdir(dirs.get_model_data_dir(TEST_BOT_ID)): file_path = os.path.join(dirs.get_model_data_dir(TEST_BOT_ID), file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: print(e)
def test_run(self): protobuf_dir = dirs.get_protobuf_dir(BOT_ID) training_data_dir = dirs.get_training_data_dir(BOT_ID) if not os.listdir(training_data_dir): print("Cannot start test. No data in %s" & training_data_dir) return if not os.path.exists(protobuf_dir): os.mkdir(protobuf_dir) if os.listdir(protobuf_dir): shutil.rmtree(protobuf_dir) os.mkdir(protobuf_dir) converter.run(training_data_dir, protobuf_dir, 0.1) # Check if the labels.txt has been created self.assertTrue( os.path.isfile(os.path.join(protobuf_dir, 'labels.txt'))) # Make sure the labels file contains as mainy files as the training data folder has subfolders with open(os.path.join(protobuf_dir, 'labels.txt')) as f: for lndx, dir in enumerate(os.listdir(training_data_dir)): pass for fndx, ln in enumerate(f): pass self.assertEqual(lndx, fndx) # Make sure there are 10 protofiles = 0 training_files = 0 validation_files = 0 for file in os.listdir(protobuf_dir): if file.endswith('.tfrecord'): protofiles += 1 if 'train' in file: training_files += 1 if 'validation' in file: validation_files += 1 self.assertEqual(10, protofiles) self.assertEqual(5, training_files) self.assertEqual(5, validation_files) if os.listdir(protobuf_dir): shutil.rmtree(protobuf_dir) os.mkdir(protobuf_dir)
def write_to_protobuffer(bot_id: int): """ Read the data from the training data directory, convert them to protobuffer format and write them to the protobuffer directory """ bot_training_data_dir = dirs.get_training_data_dir(bot_id) if not os.path.exists(bot_training_data_dir): return False bot_protobuf_dir = dirs.get_protobuf_dir(bot_id) if not os.path.exists(bot_protobuf_dir): os.mkdir(bot_protobuf_dir) converter.run(bot_training_data_dir, bot_protobuf_dir) return True
def delete_bot_data(bot_id): """ Delete all data of a bot in the filesystem if it exists """ training_data_dir = dirs.get_training_data_dir(bot_id) protobuf_dir = dirs.get_protobuf_dir(bot_id) model_dir = dirs.get_model_data_dir(bot_id) if os.path.isdir(training_data_dir): print('[Training Data Service] - Deleteting %s' % training_data_dir) rmtree(training_data_dir) if os.path.isdir(protobuf_dir): print('[Training Data Service] - Deleteting %s' % protobuf_dir) rmtree(protobuf_dir) if os.path.isdir(model_dir): print('[Training Data Service] - Deleteting %s' % model_dir) rmtree(model_dir) return 'Successfully Deleted Data for Bot %s' % bot_id, 200
def test_dataset_factory(self): train_set = factory.get_dataset('bot', 'train', BOT_PROTOBUF_DIR) validation_set = factory.get_dataset('bot', 'validation', BOT_PROTOBUF_DIR) self.assertTrue(train_set) self.assertTrue(type(train_set) is tf_slim.dataset.Dataset) self.assertEqual(train_set.num_classes, 5) self.assertEqual(train_set.num_samples, 3320) self.assertTrue(validation_set) self.assertTrue(type(validation_set) is tf_slim.dataset.Dataset) self.assertEqual(validation_set.num_classes, 5) self.assertEqual(validation_set.num_samples, 350) bmw_models_bot_id = 'bmw_models' bmw_model_protobuf = dirs.get_protobuf_dir(bmw_models_bot_id) train_set = factory.get_dataset('bot', 'train', bmw_model_protobuf) validation_set = factory.get_dataset('bot', 'validation', bmw_model_protobuf) exp_num_classes = utils.get_number_of_classes_by_labels( bmw_model_protobuf) exp_train_set_size = utils.get_split_size(bmw_models_bot_id, 'train') exp_val_set_size = utils.get_split_size(bmw_models_bot_id, 'validation') self.assertTrue(train_set) self.assertTrue(type(train_set) is tf_slim.dataset.Dataset) self.assertEqual(train_set.num_classes, exp_num_classes) self.assertEqual(train_set.num_samples, exp_train_set_size) self.assertTrue(validation_set) self.assertTrue(type(validation_set) is tf_slim.dataset.Dataset) self.assertEqual(validation_set.num_classes, exp_num_classes) self.assertEqual(validation_set.num_samples, exp_val_set_size)
import unittest from unittest import TestCase import os import shutil from cnn_server.server import file_service as dirs from cnn_server.training_data import training_data_service as service FILES_DIR = 'files' TRAINING_DATA_DIR = '/home/markus/projects/cnn_server/training_data/' BOT_ID = 1 BOT_TRAINING_DATA_DIR = dirs.get_training_data_dir(BOT_ID) BOT_PROTOBUF_DIR = dirs.get_protobuf_dir(BOT_ID) class TestTrainingDatService(TestCase): def test_validate_training_data(self): # Read the ZIP Files valid_zip = os.path.join(FILES_DIR, 'valid_trainingdata.zip') invalid_zip_subfolder = os.path.join( FILES_DIR, 'invalid_training_data_subfolder.zip') invalid_zip_file = os.path.join(FILES_DIR, 'invalid_training_data_file.zip') invalid_zip_emptysub = os.path.join( FILES_DIR, 'invalid_training_data_emptysub.zip') invalid_zip_emptysubend = os.path.join( FILES_DIR, 'invalid_training_data_emptysubend.zip') invalid_flowers = os.path.join(FILES_DIR, 'invalid_flower_photos.zip') some_file_path = os.path.join(FILES_DIR, 'some_file.txt')
def eval(bot_id, bot_suffix, setting_id=None, dataset_split='train', dataset_name='bot', model_name='inception_v4', preprocessing=None, moving_average_decay=None, tf_master=''): full_id = bot_id + bot_suffix if setting_id: protobuf_dir = dirs.get_transfer_proto_dir(bot_id, setting_id) else: protobuf_dir = dirs.get_protobuf_dir(bot_id) _check_dir(protobuf_dir) print("READIND FROM %s" % (protobuf_dir)) performance_data_dir = dirs.get_performance_data_dir(bot_id) # if os.listdir(performance_data_dir): # raise ValueError('%s is not empty' % performance_data_dir) tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(dataset_name, dataset_split, protobuf_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( model_name, num_classes=(dataset.num_classes - LABELS_OFFSET), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * BATCH_SIZE, common_queue_min=BATCH_SIZE) [image, label] = provider.get(['image', 'label']) label -= LABELS_OFFSET ##################################### # Select the preprocessing function # ##################################### preprocessing_name = preprocessing or model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = EVAL_IMAGE_SIZE or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch([image, label], batch_size=BATCH_SIZE, num_threads=NUM_THREADS, capacity=5 * BATCH_SIZE) #################### # Define the model # #################### logits, _ = network_fn(images) if moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k(logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if MAX_NUM_BATCHES: num_batches = MAX_NUM_BATCHES else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(BATCH_SIZE)) print(dataset.num_samples) print(dataset.num_classes)
def infere(bot_id, image_file, network_name='inception_v4', return_labels=None, prediction_dict=[]): """ Loads the corresponding model checkpoint, network function and preprocessing routine based on bot_id and network_name, restores the graph and runs it to the prediction enpoint with the image as input :param bot_id: bot_id, used to reference to correct model directory :param image_file: reference to the temporary image file to be classified :param network_name: name of the network type to be used :param return_labels: number of labels to return :return: the top n labels with probabilities, where n = return_labels """ # Get the model path model_path = dirs.get_model_data_dir(bot_id) # Get number of classes to predict protobuf_dir = dirs.get_protobuf_dir(bot_id) number_of_classes = dataset_utils.get_number_of_classes_by_labels( protobuf_dir) if not return_labels: return_labels = number_of_classes # Get the preprocessing and network construction functions preprocessing_fn = preprocessing_factory.get_preprocessing( network_name, is_training=False) network_fn = network_factory.get_network_fn(network_name, number_of_classes) # Process the temporary image file into a Tensor of shape [widht, height, channels] image_tensor = tf.gfile.FastGFile(image_file, 'rb').read() image_tensor = tf.image.decode_image(image_tensor, channels=0) # Perform preprocessing and reshape into [network.default_width, network.default_height, channels] network_default_size = network_fn.default_image_size image_tensor = preprocessing_fn(image_tensor, network_default_size, network_default_size) # Create an input batch of size one from the preprocessed image input_batch = tf.reshape(image_tensor, [1, 299, 299, 3]) # Create the network up to the Predictions Endpoint logits, endpoints = network_fn(input_batch) restorer = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() # Restore the variables of the network from the last checkpoint and run the graph restorer.restore(sess, tf.train.latest_checkpoint(model_path)) sess.run(endpoints) # Get the numpy array of predictions out of the predictions = endpoints['Predictions'].eval()[0] sess.close() prediction_dict['predictions'] = map_predictions_to_labels( protobuf_dir, predictions, return_labels)
def _convert(bot_id): training_data_dir = dirs.get_training_data_dir(bot_id) protobuf_dir = dirs.get_protobuf_dir(bot_id) if _check_training_dir(training_data_dir) and _check_proto_dir( protobuf_dir): converter.run(training_data_dir, protobuf_dir)
def test_get_filenames_and_classes(self): bmw3_exp = {'train': 2048, 'validation': 228} bmw5_exp = {'train': 515, 'validation': 57} bmw6_exp = {'train': 487, 'validation': 54} bmw7_exp = {'train': 1049, 'validation': 117} protobuf_dir = dirs.get_protobuf_dir(BOT_ID) training_data_dir = dirs.get_training_data_dir(BOT_ID) if not os.listdir(training_data_dir): print("Cannot start test. No data in %s" & training_data_dir) return if not os.path.exists(protobuf_dir): os.mkdir(protobuf_dir) if os.listdir(protobuf_dir): shutil.rmtree(protobuf_dir) os.mkdir(protobuf_dir) train, val, classes = converter._get_filenames_and_classes( training_data_dir, 0.1) bmw3_ctr_tr = 0 bmw5_ctr_tr = 0 bmw6_ctr_tr = 0 bmw7_ctr_tr = 0 for file in train: cl = os.path.basename(os.path.dirname(file)) if cl == 'bmw3': bmw3_ctr_tr += 1 elif cl == 'bmw5': bmw5_ctr_tr += 1 elif cl == 'bmw6': bmw6_ctr_tr += 1 elif cl == 'bmw7': bmw7_ctr_tr += 1 bmw3_ctr_vl = 0 bmw5_ctr_vl = 0 bmw6_ctr_vl = 0 bmw7_ctr_vl = 0 for file in val: cl = os.path.basename(os.path.dirname(file)) if cl == 'bmw3': bmw3_ctr_vl += 1 elif cl == 'bmw5': bmw5_ctr_vl += 1 elif cl == 'bmw6': bmw6_ctr_vl += 1 elif cl == 'bmw7': bmw7_ctr_vl += 1 self.assertIn(bmw3_exp['train'] - bmw3_ctr_tr, range(-2, 3)) self.assertIn(bmw5_exp['train'] - bmw5_ctr_tr, range(-2, 3)) self.assertIn(bmw6_exp['train'] - bmw6_ctr_tr, range(-2, 3)) self.assertIn(bmw7_exp['train'] - bmw7_ctr_tr, range(-2, 3)) self.assertIn(bmw3_exp['validation'] - bmw3_ctr_vl, range(-2, 3)) self.assertIn(bmw5_exp['validation'] - bmw5_ctr_vl, range(-2, 3)) self.assertIn(bmw6_exp['validation'] - bmw6_ctr_vl, range(-2, 3)) self.assertIn(bmw7_exp['validation'] - bmw7_ctr_vl, range(-2, 3)) if os.listdir(protobuf_dir): shutil.rmtree(protobuf_dir) os.mkdir(protobuf_dir)
def test_transfer_learning(self): # Root model to initialize from root_model_dir = dirs.get_test_root_model_dir() if not os.listdir(root_model_dir): print('root_model_dir %s empty. Cannot start test' % root_model_dir) return None if not os.path.isfile(os.path.join(root_model_dir, 'checkpoint')): print('No Checkpoint File in %s. Cannot start test.' % root_model_dir) return None # Folder to load the additional training data from bot_protobuf_dir = dirs.get_protobuf_dir(TEST_BOT_ID) if not os.path.isdir(bot_protobuf_dir): print('bot_protobuf_dir %s does not exist. Cannot start test' % bot_protobuf_dir) return None if not os.listdir(bot_protobuf_dir): print("bot_protobuf_dir %s is empty. Cannot start test." % bot_protobuf_dir) # Bot model folder to write the transfer learned model back to bot_model_dir = dirs.get_model_data_dir(TEST_BOT_ID) if not os.path.isdir(bot_model_dir): print('bot_model_dir %s does not exist. Cannot start test' % bot_model_dir) return None if os.listdir(bot_model_dir): print('bot_model_dir %s is not emtpy. Cannot start test.' % bot_model_dir) return None # Just run one step to make sure checkpoint files are written appropriately transfer_learning.transfer_learning(root_model_dir=root_model_dir, bot_model_dir=bot_model_dir, protobuf_dir=bot_protobuf_dir, max_train_time_sec=100, log_every_n_steps=2) # Check if the root model dir is still intact self.assertTrue( os.listdir(root_model_dir), 'root_model_dir %s is empty after transfer learning.' % root_model_dir) self.assertTrue( os.path.isfile(os.path.join(root_model_dir, 'checkpoint')), 'checkpoints file in root_model_dir %s is gone after transfer learning.' % root_model_dir) # Check if the bot model dir contains a model now self.assertTrue( os.listdir(bot_model_dir), 'bot_model_dir %s is empty after transfer learning' % bot_model_dir) self.assertTrue( os.path.isfile(os.path.join(bot_model_dir, 'checkpoint')), 'not checkpoints file in bot_model_dir %s after transfer learning' % bot_model_dir) # Mock a file for classification temp_file = tempfile.NamedTemporaryFile() temp_file.write( base64.b64encode( open(os.path.join(FILES_DIR, 'tulip.jpg'), "rb").read())) temp_file.seek(0) json_result, status = handler.handle_post(TEST_BOT_ID, temp_file.read(), return_labels=5) print(json_result) temp_file.close() self.assertTrue(json_result, 'Classification result is empty') json_result = json.loads(json_result) self.assertTrue(json_result['labels'], 'No labels in json result %s' % json_result) self.assertTrue(json_result['probabilities'], 'No predictions in json result %s' % json_result) print(json_result) # Clean the bot_model directory for next test run for file in os.listdir(bot_model_dir): file_path = os.path.join(bot_model_dir, file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: print(e)