def augmentation_file_batch_manager_with_file_input_example(): num_examples = 300 is_label_augmented = False num_files = generate_file_dataset('./batches', num_examples, is_label_augmented) npy_filepath_pairs = list() for idx in range(num_files): npy_filepath_pairs.append(('./batches/images_{}.npy'.format(idx), './batches/labels_{}.npy'.format(idx))) npy_filepath_pairs = np.array(npy_filepath_pairs) num_file_pairs = 3 num_file_pair_steps = ((num_files - 1) // num_file_pairs + 1) if num_files > 0 else 0 batch_size = 12 num_epochs = 7 shuffle = True is_time_major = False augmenter = IdentityAugmenter() batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) #-------------------- for epoch in range(num_epochs): print('>>>>> Epoch #{}.'.format(epoch)) while True: dir_path = dirMgr.requestDirectory() if dir_path is not None: break else: time.sleep(0.1) print('\t>>>>> Directory: {}.'.format(dir_path)) indices = np.arange(num_files) if shuffle: np.random.shuffle(indices) for step in range(num_file_pair_steps): print('\t\t>>>>> File pairs #{}.'.format(step)) start = step * num_file_pairs end = start + num_file_pairs file_pair_indices = indices[start:end] if file_pair_indices.size > 0: # If file_pair_indices is non-empty. sub_filepath_pairs = npy_filepath_pairs[file_pair_indices] if sub_filepath_pairs.size > 0: # If sub_filepath_pairs is non-empty. # Can run in an individual thread or process. batchMgr = AugmentationFileBatchManagerWithFileInput(augmenter, sub_filepath_pairs, batch_size, shuffle, is_label_augmented, is_time_major) batchMgr.putBatches(dir_path) # Generates, augments, and saves batches. batches = batchMgr.getBatches(dir_path) # Loads batches. for idx, batch in enumerate(batches): # Train with each batch (images & labels). #print('\t\t{}: {}, {}'.format(idx, batch[0].shape, batch[1].shape)) print('{}: {}-{}, {}-{}'.format(idx, batch[0].shape, np.max(np.reshape(batch[0], (batch[0].shape[0], -1)), axis=-1), batch[1].shape, np.max(np.reshape(batch[1], (batch[1].shape[0], -1)), axis=-1))) dirMgr.returnDirectory(dir_path)
def imgaug_file_batch_manager_example(): num_examples = 100 is_label_augmented = False images, labels = generate_dataset(num_examples, is_label_augmented) batch_size = 12 num_epochs = 7 shuffle = True is_time_major = False augmenter = iaa.Sequential( [iaa.Fliplr(0.5), iaa.CoarseDropout(p=0.1, size_percent=0.1)]) batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) #-------------------- for epoch in range(num_epochs): print('>>>>> Epoch #{}.'.format(epoch)) dir_path = dirMgr.requestAvailableDirectory() if dir_path is None: break print('\t>>>>> Directory: {}.'.format(dir_path)) batchMgr = ImgaugFileBatchManager(augmenter, images, labels, batch_size, shuffle, is_label_augmented, is_time_major) batchMgr.putBatches( dir_path) # Generates, augments, and saves batches. batches = batchMgr.getBatches(dir_path) # Loads batches. for idx, batch in enumerate(batches): # Train with each batch (images & labels). #print('\t{}: {}, {}'.format(idx, batch[0].shape, batch[1].shape)) print('{}: {}-{}, {}-{}'.format( idx, batch[0].shape, np.max(np.reshape(batch[0], (batch[0].shape[0], -1)), axis=-1), batch[1].shape, np.max(np.reshape(batch[1], (batch[1].shape[0], -1)), axis=-1))) dirMgr.returnDirectory(dir_path)
def simple_file_batch_manager_example(): num_examples = 100 images, labels = generate_dataset(num_examples) batch_size = 12 num_epochs = 7 shuffle = True is_time_major = False batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) #-------------------- for epoch in range(num_epochs): print('>>>>> Epoch #{}.'.format(epoch)) while True: dir_path = dirMgr.requestAvailableDirectory() if dir_path is not None: break else: time.sleep(0.1) print('\t>>>>> Directory: {}.'.format(dir_path)) batchMgr = SimpleFileBatchManager(images, labels, batch_size, shuffle, is_time_major) batchMgr.putBatches(dir_path) # Generates and saves batches. batches = batchMgr.getBatches(dir_path) # Loads batches. for idx, batch in enumerate(batches): # Can run in an individual thread or process. # Augment each batch (images & labels). # Train with each batch (images & labels). #print('\t{}: {}, {}'.format(idx, batch[0].shape, batch[1].shape)) print('\t{}: {}-{}, {}-{}'.format( idx, batch[0].shape, np.max(np.reshape(batch[0], (batch[0].shape[0], -1)), axis=-1), batch[1].shape, np.max(np.reshape(batch[1], (batch[1].shape[0], -1)), axis=-1))) dirMgr.returnDirectory(dir_path)
def sync_multiprocess_augmentation_file_batch_manager_example(): num_examples = 100 is_label_augmented = False images, labels = generate_dataset(num_examples, is_label_augmented) batch_size = 12 num_epochs = 7 shuffle = True is_time_major = False augmenter = IdentityAugmenter() batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) #-------------------- with mp.Pool() as pool: for epoch in range(num_epochs): print('>>>>> Epoch #{}.'.format(epoch)) while True: dir_path = dirMgr.requestDirectory() if dir_path is not None: break else: time.sleep(0.1) print('\t>>>>> Directory: {}.'.format(dir_path)) batchMgr = AugmentationFileBatchManager(augmenter, images, labels, batch_size, shuffle, is_label_augmented, is_time_major, pool) batchMgr.putBatches(dir_path) # Generates, augments, and saves batches. batches = batchMgr.getBatches(dir_path) # Loads batches. for idx, batch in enumerate(batches): # Train with each batch (images & labels). #print('\t{}: {}, {}'.format(idx, batch[0].shape, batch[1].shape)) print('\t{}: {}-{}, {}-{}'.format(idx, batch[0].shape, np.max(np.reshape(batch[0], (batch[0].shape[0], -1)), axis=-1), batch[1].shape, np.max(np.reshape(batch[1], (batch[1].shape[0], -1)), axis=-1))) dirMgr.returnDirectory(dir_path)
def main(): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'synth90k_crnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20180302T155710' initial_epoch = 0 # When outputs are not sparse, CRNN model's output shape = (samples, 32, num_classes) and dataset's output shape = (samples, 23, num_classes). is_sparse_output = True # Fixed. #is_time_major = False # Fixed. # NOTE [info] >> Places with the same parameters. # class Synth90kLabelConverter in ${SWL_PYTHON_HOME}/test/language_processing/synth90k_dataset_test.py. # class Synth90kPreprocessor. image_height, image_width, image_channel = 32, 128, 1 max_label_len = 23 # Max length of words in lexicon. # Label: 0~9 + a~z + A~Z. #label_characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' # Label: 0~9 + a~z. label_characters = '0123456789abcdefghijklmnopqrstuvwxyz' SOS = '<SOS>' # All strings will start with the Start-Of-String token. EOS = '<EOS>' # All strings will end with the End-Of-String token. #extended_label_list = [SOS] + list(label_characters) + [EOS] extended_label_list = list(label_characters) + [EOS] #extended_label_list = list(label_characters) label_int2char = extended_label_list label_char2int = {c:i for i, c in enumerate(extended_label_list)} num_labels = len(extended_label_list) num_classes = num_labels + 1 # extended labels + blank label. # NOTE [info] >> The largest value (num_classes - 1) is reserved for the blank label. blank_label = num_classes - 1 label_eos_token = label_char2int[EOS] #label_eos_token = blank_label batch_size = 256 # Number of samples per gradient update. num_epochs = 100 # Number of times to iterate over training data. shuffle = True augmenter = ImgaugAugmenter() #augmenter = create_imgaug_augmenter() # If imgaug augmenter is used, data are augmented in background augmentation processes. (faster) is_output_augmented = False #use_multiprocessing = True # Fixed. Batch generators & loaders are used in case of multiprocessing. #use_file_batch_loader = True # Fixed. It is not related to multiprocessing. num_loaded_files_at_a_time = 5 num_processes = 5 train_batch_dir_path_prefix = './train_batch_dir' num_train_batch_dirs = 10 val_batch_dir_path_prefix = './val_batch_dir' num_val_batch_dirs = 1 test_batch_dir_path_prefix = './test_batch_dir' num_test_batch_dirs = 1 batch_info_csv_filename = 'batch_info.csv' sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepares multiprocessing. # set_start_method() should not be used more than once in the program. #mp.set_start_method('spawn') BaseManager.register('WorkingDirectoryManager', WorkingDirectoryManager) BaseManager.register('TwoStepWorkingDirectoryManager', TwoStepWorkingDirectoryManager) BaseManager.register('NpzFileBatchGeneratorFromNpyFiles', NpzFileBatchGeneratorFromNpyFiles) #BaseManager.register('NpzFileBatchLoader', NpzFileBatchLoader) manager = BaseManager() manager.start() lock = mp.Lock() #lock = mp.Manager().Lock() # TypeError: can't pickle _thread.lock objects. #-------------------- # Prepares directories. output_dir_path = os.path.join('.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. # NOTE [info] >> Generate synth90k dataset using swl.language_processing.synth90k_dataset.save_synth90k_dataset_to_npy_files(). # Refer to ${SWL_PYTHON_HOME}/test/language_processing/synth90k_dataset_test.py. synth90k_base_dir_path = './synth90k_npy' train_input_filepaths, train_output_filepaths, val_input_filepaths, val_output_filepaths, test_input_filepaths, test_output_filepaths = load_data(synth90k_base_dir_path) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): #K.set_learning_phase(1) # Sets the learning phase to 'train'. (Required) # Creates a model. modelForTraining = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleCrnnTrainer(modelForTraining, initial_epoch) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForEvaluation = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForInference = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: valDirMgr = WorkingDirectoryManager(val_batch_dir_path_prefix, num_val_batch_dirs) print('\tWaiting for a validation batch directory...') while True: val_dir_path = valDirMgr.requestDirectory() if val_dir_path is not None: break else: time.sleep(0.1) print('\tGot a validation batch directory: {}.'.format(val_dir_path)) valFileBatchGenerator = NpzFileBatchGeneratorFromNpyFiles(val_input_filepaths, val_output_filepaths, num_loaded_files_at_a_time, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) num_saved_examples = valFileBatchGenerator.saveBatches(val_dir_path) # Generates and saves batches. print('\t#saved examples = {}.'.format(num_saved_examples)) valDirMgr.returnDirectory(val_dir_path) #-------------------- # Multiprocessing (augmentation) + multithreading (training). trainDirMgr = TwoStepWorkingDirectoryManager(train_batch_dir_path_prefix, num_train_batch_dirs) training_worker_thread = threading.Thread(target=training_worker_proc, args=(train_session, nnTrainer, trainDirMgr, valDirMgr, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, is_sparse_output)) training_worker_thread.start() trainDirMgr_mp = manager.TwoStepWorkingDirectoryManager(train_batch_dir_path_prefix, num_train_batch_dirs) #valDirMgr_mp = manager.WorkingDirectoryManager(val_batch_dir_path_prefix, num_val_batch_dirs) #trainFileBatchGenerator_mp = manager.NpzFileBatchGeneratorFromNpyFiles(train_input_filepaths, train_output_filepaths, num_loaded_files_at_a_time, batch_size, shuffle, False, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename) #trainFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) #valFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock,)) as pool: data_augmentation_results = pool.map_async(partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_input_filepaths, train_output_filepaths, num_loaded_files_at_a_time, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) data_augmentation_results.get(timeout) training_worker_thread.join() #-------------------- valFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_file_batch_loader(sess, nnEvaluator, valFileBatchLoader, valDirMgr, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infers. testDirMgr = WorkingDirectoryManager(test_batch_dir_path_prefix, num_test_batch_dirs) #-------------------- print('\tWaiting for a test batch directory...') while True: test_dir_path = testDirMgr.requestDirectory() if test_dir_path is not None: break else: time.sleep(0.1) print('\tGot a test batch directory: {}.'.format(test_dir_path)) testFileBatchGenerator = NpzFileBatchGeneratorFromNpyFiles(test_input_filepaths, test_output_filepaths, num_loaded_files_at_a_time, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) num_saved_examples = testFileBatchGenerator.saveBatches(test_dir_path) # Generates and saves batches. print('\t#saved examples = {}.'.format(num_saved_examples)) testDirMgr.returnDirectory(test_dir_path) #-------------------- testFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_file_batch_loader(sess, nnInferrer, testFileBatchLoader, testDirMgr, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) #-------------------- if inferences is not None: if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero(np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format(correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'mnist_cnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20190127T001424' initial_epoch = 0 num_classes = 10 input_shape = (None, 28, 28, 1) # 784 = 28 * 28. output_shape = (None, num_classes) batch_size = 128 # Number of samples per gradient update. num_epochs = 30 # Number of times to iterate over training data. shuffle = True augmenter = ImgaugAugmenter() is_output_augmented = False use_multiprocessing = True # Batch generators & loaders are used in case of multiprocessing. use_file_batch_loader = True # Is not related to multiprocessing. num_processes = 5 train_batch_dir_path_prefix = './train_batch_dir' #train_num_batch_dirs = 5 val_batch_dir_path_prefix = './val_batch_dir' val_num_batch_dirs = 1 test_batch_dir_path_prefix = './test_batch_dir' test_num_batch_dirs = 1 batch_info_csv_filename = 'batch_info.csv' sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepares multiprocessing. if use_multiprocessing: # set_start_method() should not be used more than once in the program. #mp.set_start_method('spawn') BaseManager.register('WorkingDirectoryManager', WorkingDirectoryManager) BaseManager.register('TwoStepWorkingDirectoryManager', TwoStepWorkingDirectoryManager) BaseManager.register('NpzFileBatchGenerator', NpzFileBatchGenerator) #BaseManager.register('NpzFileBatchLoader', NpzFileBatchLoader) manager = BaseManager() manager.start() lock = mp.Lock() #lock= mp.Manager().Lock() # TypeError: can't pickle _thread.lock objects. #-------------------- # Prepares directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. train_images, train_labels, test_images, test_labels = load_data( input_shape[1:]) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Creates a model. modelForTraining = create_mnist_cnn(input_shape, output_shape) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): # Creates a model. modelForEvaluation = create_mnist_cnn(input_shape, output_shape) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Creates a model. modelForInference = create_mnist_cnn(input_shape, output_shape) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: if use_file_batch_loader or use_multiprocessing: valDirMgr = WorkingDirectoryManager(val_batch_dir_path_prefix, val_num_batch_dirs) while True: val_dir_path = valDirMgr.requestDirectory() if val_dir_path is not None: break else: time.sleep(0.1) print( '\tGot a validation batch directory: {}.'.format(val_dir_path)) valFileBatchGenerator = NpzFileBatchGenerator( test_images, test_labels, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) valFileBatchGenerator.saveBatches( val_dir_path) # Generates and saves batches. valDirMgr.returnDirectory(val_dir_path) if use_multiprocessing: train_num_batch_dirs = 5 trainDirMgr_mp = manager.TwoStepWorkingDirectoryManager( train_batch_dir_path_prefix, train_num_batch_dirs) valDirMgr_mp = manager.WorkingDirectoryManager( val_batch_dir_path_prefix, val_num_batch_dirs) #trainFileBatchGenerator_mp = manager.NpzFileBatchGenerator(train_images, train_labels, batch_size, shuffle, False, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename) #trainFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) #valFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) #-------------------- if False: # Multiprocessing only. # FIXME [fix] >> This code does not work. # TensorFlow session and saver cannot be passed to a worker procedure in using multiprocessing.pool.apply_async(). #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock, )) as pool: training_results = pool.apply_async( training_worker_proc, args=(train_session, nnTrainer, trainDirMgr_mp, valDirMgr_mp, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False)) data_augmentation_results = pool.map_async( partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_images, train_labels, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) training_results.get(timeout) data_augmentation_results.get(timeout) else: # Multiprocessing (augmentation) + multithreading (training). training_worker_thread = threading.Thread( target=training_worker_proc, args=(train_session, nnTrainer, trainDirMgr_mp, valDirMgr_mp, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False)) training_worker_thread.start() #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock, )) as pool: data_augmentation_results = pool.map_async( partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_images, train_labels, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) data_augmentation_results.get(timeout) training_worker_thread.join() elif use_file_batch_loader: train_num_batch_dirs = num_epochs trainDirMgr = WorkingDirectoryManager(train_batch_dir_path_prefix, train_num_batch_dirs) # TODO [improve] >> Not-so-good implementation. # Usaually training is performed for much more epochs, so too many batches have to be generated before training. for _ in range(train_num_batch_dirs): while True: train_dir_path = trainDirMgr.requestDirectory() if train_dir_path is not None: break else: time.sleep(0.1) print('\tGot a train batch directory: {}.'.format( train_dir_path)) trainFileBatchGenerator = NpzFileBatchGenerator( train_images, train_labels, batch_size, shuffle, False, batch_info_csv_filename=batch_info_csv_filename) trainFileBatchGenerator.saveBatches( train_dir_path) # Generates and saves batches. trainDirMgr.returnDirectory(train_dir_path) #-------------------- trainFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) valFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_loader( sess, nnTrainer, trainFileBatchLoader, valFileBatchLoader, trainDirMgr, valDirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False) print('\tTotal training time = {}'.format(time.time() - start_time)) else: trainBatchGenerator = SimpleBatchGenerator(train_images, train_labels, batch_size, shuffle, False, augmenter, is_output_augmented) valBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_generator( sess, nnTrainer, trainBatchGenerator, valBatchGenerator, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False) print('\tTotal training time = {}'.format(time.time() - start_time)) #-------------------- if use_file_batch_loader: valFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_file_batch_loader( sess, nnEvaluator, valFileBatchLoader, valDirMgr, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) else: valBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_batch_generator( sess, nnEvaluator, valBatchGenerator, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infers. if use_file_batch_loader: testDirMgr = WorkingDirectoryManager(test_batch_dir_path_prefix, test_num_batch_dirs) #-------------------- while True: test_dir_path = testDirMgr.requestDirectory() if test_dir_path is not None: break else: time.sleep(0.1) print('\tGot a test batch directory: {}.'.format(test_dir_path)) testFileBatchGenerator = NpzFileBatchGenerator( test_images, test_labels, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) testFileBatchGenerator.saveBatches( test_dir_path) # Generates and saves batches. testDirMgr.returnDirectory(test_dir_path) #-------------------- testFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_file_batch_loader( sess, nnInferrer, testFileBatchLoader, testDirMgr, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) else: testBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_batch_generator( sess, nnInferrer, testBatchGenerator, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: inferences = np.vstack(inferences) if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero( np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def simple_npz_file_batch_generator_from_image_files_and_loader_example(): num_examples = 256 npy_input_filepaths, output_seqs = generate_image_file_dataset( './image_files', num_examples) num_loaded_files = 57 num_epochs = 7 batch_size = 12 shuffle = True is_time_major = False batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) batch_info_csv_filename = 'batch_info.csv' #augmenter = augment_identically #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter() is_output_augmented = False #-------------------- for epoch in range(num_epochs): print('>>>>> Epoch #{}.'.format(epoch)) while True: dir_path = dirMgr.requestDirectory() if dir_path is not None: break else: time.sleep(0.1) print('\t>>>>> Directory: {}.'.format(dir_path)) #fileBatchGenerator = NpzFileBatchGeneratorFromImageFiles(npy_input_filepaths, output_seqs, num_loaded_files, batch_size, shuffle, is_time_major) fileBatchGenerator = NpzFileBatchGeneratorFromImageFiles( npy_input_filepaths, output_seqs, num_loaded_files, batch_size, shuffle, is_time_major, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename) num_saved_examples = fileBatchGenerator.saveBatches( dir_path) # Generates and saves batches. fileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename=batch_info_csv_filename) batches = fileBatchLoader.loadBatches(dir_path) # Loads batches. #dirMgr.returnDirectory(dir_path) # If dir_path is returned before completing a job, dir_path can be used in a different job. num_loaded_examples = 0 for idx, (batch_data, num_batch_examples) in enumerate(batches): # Can run in an individual thread or process. # Augment each batch (inputs & outputs). # Train with each batch (inputs & outputs). #print('\t{}: {}, {}, {}'.format(idx, num_batch_examples, batch_data[0].shape, batch_data[1].shape)) print('\t{}: {}, {}-{}, {}-{}'.format( idx, num_batch_examples, batch_data[0].shape, np.max(np.reshape(batch_data[0], (batch_data[0].shape[0], -1)), axis=-1), batch_data[1].shape, np.max(np.reshape(batch_data[1], (batch_data[1].shape[0], -1)), axis=-1))) num_loaded_examples += num_batch_examples print('#saved examples =', num_saved_examples) print('#loaded examples =', num_loaded_examples) dirMgr.returnDirectory(dir_path)
def mnist_batch_manager(method=0): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'mnist_cnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20181211T172200' initial_epoch = 0 image_height, image_width = 28, 28 num_classes = 10 input_shape = (None, image_height, image_width, 1) output_shape = (None, num_classes) batch_size = 128 # Number of samples per gradient update. num_epochs = 30 # Number of times to iterate over training data. shuffle = True is_label_augmented = False is_time_major = False is_sparse_output = False sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepares directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. train_images, train_labels, test_images, test_labels = load_data( input_shape[1:]) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Creates a model. modelForTraining = create_mnist_cnn(input_shape, output_shape) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): # Creates a model. modelForEvaluation = create_mnist_cnn(input_shape, output_shape) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Creates a model. modelForInference = create_mnist_cnn(input_shape, output_shape) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: # Method #0: AugmentationBatchManager without process pool. if 0 == method: #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter(image_height, image_width) trainBatchMgr = AugmentationBatchManager(augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, None) valBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_manager( sess, nnTrainer, trainBatchMgr, valBatchMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #1: AugmentationBatchManager with process pool. elif 1 == method: with mp.Pool() as pool: #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter(image_height, image_width) trainBatchMgr = AugmentationBatchManager( augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, pool) valBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_manager( sess, nnTrainer, trainBatchMgr, valBatchMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #2: AugmentationFileBatchManager without process pool. elif 2 == method: batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter(image_height, image_width) trainFileBatchMgr = AugmentationFileBatchManager( augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, None, image_file_format='train_batch_images_{}.npy', label_file_format='train_batch_labels_{}.npy') valFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_manager( sess, nnTrainer, trainFileBatchMgr, valFileBatchMgr, dirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #3: AugmentationFileBatchManager with process pool. elif 3 == method: batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) with mp.Pool() as pool: #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter(image_height, image_width) trainFileBatchMgr = AugmentationFileBatchManager( augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, pool, image_file_format='train_batch_images_{}.npy', label_file_format='train_batch_labels_{}.npy') valFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_manager( sess, nnTrainer, trainFileBatchMgr, valFileBatchMgr, dirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #4: ImgaugBatchManager with background processes. elif 4 == method: augmenter = get_imgaug_augmenter(image_height, image_width) trainBatchMgr = ImgaugBatchManager(augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major) valBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_manager( sess, nnTrainer, trainBatchMgr, valBatchMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #5: ImgaugFileBatchManager without background processes. elif 5 == method: batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) augmenter = get_imgaug_augmenter(image_height, image_width) trainFileBatchMgr = ImgaugFileBatchManager( augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, image_file_format='train_batch_images_{}.npy', label_file_format='train_batch_labels_{}.npy') valFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_manager( sess, nnTrainer, trainFileBatchMgr, valFileBatchMgr, dirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) else: raise ValueError( '[SWL] Error: Invalid batch manager method: {}.'.format( method)) #-------------------- if method in (0, 1, 4): valBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_batch_manager( sess, nnEvaluator, valBatchMgr, eval_saver, checkpoint_dir_path, is_time_major, is_sparse_output) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) elif method in (2, 3, 5): batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) valFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_file_batch_manager( sess, nnEvaluator, valFileBatchMgr, dirMgr, eval_saver, checkpoint_dir_path, is_time_major, is_sparse_output) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) else: raise ValueError( '[SWL] Error: Invalid batch manager method: {}.'.format( method)) #%%------------------------------------------------------------------ # Infers. if method in (0, 1, 4): testBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_batch_manager( sess, nnInferrer, testBatchMgr, infer_saver, checkpoint_dir_path, is_time_major) print('\tTotal inference time = {}'.format(time.time() - start_time)) elif method in (2, 3, 5): batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) testFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_file_batch_manager( sess, nnInferrer, testFileBatchMgr, dirMgr, infer_saver, checkpoint_dir_path, is_time_major) print('\tTotal inference time = {}'.format(time.time() - start_time)) else: raise ValueError( '[SWL] Error: Invalid batch manager method: {}.'.format(method)) if inferences is not None: inferences = np.vstack(inferences) if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero( np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session