def training_worker_proc(train_session, nnTrainer, trainDirMgr, valDirMgr, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output): print('\t{}: Start training worker process.'.format(os.getpid())) trainFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) valFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) #-------------------- start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_loader( sess, nnTrainer, trainFileBatchLoader, valFileBatchLoader, trainDirMgr, valDirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) print('\t{}: End training worker process.'.format(os.getpid()))
def training_worker_proc(dirMgr, batch_info_csv_filename, num_epochs): print('\t{}: Start training worker process.'.format(os.getpid())) for epoch in range(num_epochs): print('\t{}: Request a working directory: epoch {}.'.format( os.getpid(), epoch)) while True: """ global_lock.acquire() try: dir_path = dirMgr.requestDirectory() finally: global_lock.release() """ with global_lock: dir_path = dirMgr.requestDirectory() if dir_path is not None: break else: time.sleep(0.1) print('\t{}: Got a working directory: {}.'.format( os.getpid(), dir_path)) #-------------------- fileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename=batch_info_csv_filename) batches = fileBatchLoader.loadBatches(dir_path) # Loads batches. for idx, (batch_data, num_batch_examples) in enumerate(batches): # Train with each batch (inputs & outputs). #print('\t{}: {}, {}, {}'.format(idx, num_batch_examples, batch_data[0].shape, batch_data[1].shape)) print('\t{}: {}, {}-{}, {}-{}'.format( idx, num_batch_examples, batch_data[0].shape, np.max(np.reshape(batch_data[0], (batch_data[0].shape[0], -1)), axis=-1), batch_data[1].shape, np.max(np.reshape(batch_data[1], (batch_data[1].shape[0], -1)), axis=-1))) #-------------------- """ global_lock.acquire() try: dirMgr.returnDirectory(dir_path) finally: global_lock.release() """ with global_lock: dirMgr.returnDirectory(dir_path) print('\t{}: Returned a directory: {}.'.format(os.getpid(), dir_path)) print('\t{}: End training worker process.'.format(os.getpid()))
def main(): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'synth90k_crnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20180302T155710' initial_epoch = 0 # When outputs are not sparse, CRNN model's output shape = (samples, 32, num_classes) and dataset's output shape = (samples, 23, num_classes). is_sparse_output = True # Fixed. #is_time_major = False # Fixed. # NOTE [info] >> Places with the same parameters. # class Synth90kLabelConverter in ${SWL_PYTHON_HOME}/test/language_processing/synth90k_dataset_test.py. # class Synth90kPreprocessor. image_height, image_width, image_channel = 32, 128, 1 max_label_len = 23 # Max length of words in lexicon. # Label: 0~9 + a~z + A~Z. #label_characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' # Label: 0~9 + a~z. label_characters = '0123456789abcdefghijklmnopqrstuvwxyz' SOS = '<SOS>' # All strings will start with the Start-Of-String token. EOS = '<EOS>' # All strings will end with the End-Of-String token. #extended_label_list = [SOS] + list(label_characters) + [EOS] extended_label_list = list(label_characters) + [EOS] #extended_label_list = list(label_characters) label_int2char = extended_label_list label_char2int = {c:i for i, c in enumerate(extended_label_list)} num_labels = len(extended_label_list) num_classes = num_labels + 1 # extended labels + blank label. # NOTE [info] >> The largest value (num_classes - 1) is reserved for the blank label. blank_label = num_classes - 1 label_eos_token = label_char2int[EOS] #label_eos_token = blank_label batch_size = 256 # Number of samples per gradient update. num_epochs = 100 # Number of times to iterate over training data. shuffle = True augmenter = ImgaugAugmenter() #augmenter = create_imgaug_augmenter() # If imgaug augmenter is used, data are augmented in background augmentation processes. (faster) is_output_augmented = False #use_multiprocessing = True # Fixed. Batch generators & loaders are used in case of multiprocessing. #use_file_batch_loader = True # Fixed. It is not related to multiprocessing. num_loaded_files_at_a_time = 5 num_processes = 5 train_batch_dir_path_prefix = './train_batch_dir' num_train_batch_dirs = 10 val_batch_dir_path_prefix = './val_batch_dir' num_val_batch_dirs = 1 test_batch_dir_path_prefix = './test_batch_dir' num_test_batch_dirs = 1 batch_info_csv_filename = 'batch_info.csv' sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepares multiprocessing. # set_start_method() should not be used more than once in the program. #mp.set_start_method('spawn') BaseManager.register('WorkingDirectoryManager', WorkingDirectoryManager) BaseManager.register('TwoStepWorkingDirectoryManager', TwoStepWorkingDirectoryManager) BaseManager.register('NpzFileBatchGeneratorFromNpyFiles', NpzFileBatchGeneratorFromNpyFiles) #BaseManager.register('NpzFileBatchLoader', NpzFileBatchLoader) manager = BaseManager() manager.start() lock = mp.Lock() #lock = mp.Manager().Lock() # TypeError: can't pickle _thread.lock objects. #-------------------- # Prepares directories. output_dir_path = os.path.join('.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. # NOTE [info] >> Generate synth90k dataset using swl.language_processing.synth90k_dataset.save_synth90k_dataset_to_npy_files(). # Refer to ${SWL_PYTHON_HOME}/test/language_processing/synth90k_dataset_test.py. synth90k_base_dir_path = './synth90k_npy' train_input_filepaths, train_output_filepaths, val_input_filepaths, val_output_filepaths, test_input_filepaths, test_output_filepaths = load_data(synth90k_base_dir_path) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): #K.set_learning_phase(1) # Sets the learning phase to 'train'. (Required) # Creates a model. modelForTraining = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleCrnnTrainer(modelForTraining, initial_epoch) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForEvaluation = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForInference = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: valDirMgr = WorkingDirectoryManager(val_batch_dir_path_prefix, num_val_batch_dirs) print('\tWaiting for a validation batch directory...') while True: val_dir_path = valDirMgr.requestDirectory() if val_dir_path is not None: break else: time.sleep(0.1) print('\tGot a validation batch directory: {}.'.format(val_dir_path)) valFileBatchGenerator = NpzFileBatchGeneratorFromNpyFiles(val_input_filepaths, val_output_filepaths, num_loaded_files_at_a_time, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) num_saved_examples = valFileBatchGenerator.saveBatches(val_dir_path) # Generates and saves batches. print('\t#saved examples = {}.'.format(num_saved_examples)) valDirMgr.returnDirectory(val_dir_path) #-------------------- # Multiprocessing (augmentation) + multithreading (training). trainDirMgr = TwoStepWorkingDirectoryManager(train_batch_dir_path_prefix, num_train_batch_dirs) training_worker_thread = threading.Thread(target=training_worker_proc, args=(train_session, nnTrainer, trainDirMgr, valDirMgr, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, is_sparse_output)) training_worker_thread.start() trainDirMgr_mp = manager.TwoStepWorkingDirectoryManager(train_batch_dir_path_prefix, num_train_batch_dirs) #valDirMgr_mp = manager.WorkingDirectoryManager(val_batch_dir_path_prefix, num_val_batch_dirs) #trainFileBatchGenerator_mp = manager.NpzFileBatchGeneratorFromNpyFiles(train_input_filepaths, train_output_filepaths, num_loaded_files_at_a_time, batch_size, shuffle, False, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename) #trainFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) #valFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock,)) as pool: data_augmentation_results = pool.map_async(partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_input_filepaths, train_output_filepaths, num_loaded_files_at_a_time, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) data_augmentation_results.get(timeout) training_worker_thread.join() #-------------------- valFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_file_batch_loader(sess, nnEvaluator, valFileBatchLoader, valDirMgr, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infers. testDirMgr = WorkingDirectoryManager(test_batch_dir_path_prefix, num_test_batch_dirs) #-------------------- print('\tWaiting for a test batch directory...') while True: test_dir_path = testDirMgr.requestDirectory() if test_dir_path is not None: break else: time.sleep(0.1) print('\tGot a test batch directory: {}.'.format(test_dir_path)) testFileBatchGenerator = NpzFileBatchGeneratorFromNpyFiles(test_input_filepaths, test_output_filepaths, num_loaded_files_at_a_time, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) num_saved_examples = testFileBatchGenerator.saveBatches(test_dir_path) # Generates and saves batches. print('\t#saved examples = {}.'.format(num_saved_examples)) testDirMgr.returnDirectory(test_dir_path) #-------------------- testFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_file_batch_loader(sess, nnInferrer, testFileBatchLoader, testDirMgr, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) #-------------------- if inferences is not None: if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero(np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format(correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'mnist_cnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20190127T001424' initial_epoch = 0 num_classes = 10 input_shape = (None, 28, 28, 1) # 784 = 28 * 28. output_shape = (None, num_classes) batch_size = 128 # Number of samples per gradient update. num_epochs = 30 # Number of times to iterate over training data. shuffle = True augmenter = ImgaugAugmenter() is_output_augmented = False use_multiprocessing = True # Batch generators & loaders are used in case of multiprocessing. use_file_batch_loader = True # Is not related to multiprocessing. num_processes = 5 train_batch_dir_path_prefix = './train_batch_dir' #train_num_batch_dirs = 5 val_batch_dir_path_prefix = './val_batch_dir' val_num_batch_dirs = 1 test_batch_dir_path_prefix = './test_batch_dir' test_num_batch_dirs = 1 batch_info_csv_filename = 'batch_info.csv' sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepares multiprocessing. if use_multiprocessing: # set_start_method() should not be used more than once in the program. #mp.set_start_method('spawn') BaseManager.register('WorkingDirectoryManager', WorkingDirectoryManager) BaseManager.register('TwoStepWorkingDirectoryManager', TwoStepWorkingDirectoryManager) BaseManager.register('NpzFileBatchGenerator', NpzFileBatchGenerator) #BaseManager.register('NpzFileBatchLoader', NpzFileBatchLoader) manager = BaseManager() manager.start() lock = mp.Lock() #lock= mp.Manager().Lock() # TypeError: can't pickle _thread.lock objects. #-------------------- # Prepares directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. train_images, train_labels, test_images, test_labels = load_data( input_shape[1:]) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Creates a model. modelForTraining = create_mnist_cnn(input_shape, output_shape) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): # Creates a model. modelForEvaluation = create_mnist_cnn(input_shape, output_shape) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Creates a model. modelForInference = create_mnist_cnn(input_shape, output_shape) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: if use_file_batch_loader or use_multiprocessing: valDirMgr = WorkingDirectoryManager(val_batch_dir_path_prefix, val_num_batch_dirs) while True: val_dir_path = valDirMgr.requestDirectory() if val_dir_path is not None: break else: time.sleep(0.1) print( '\tGot a validation batch directory: {}.'.format(val_dir_path)) valFileBatchGenerator = NpzFileBatchGenerator( test_images, test_labels, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) valFileBatchGenerator.saveBatches( val_dir_path) # Generates and saves batches. valDirMgr.returnDirectory(val_dir_path) if use_multiprocessing: train_num_batch_dirs = 5 trainDirMgr_mp = manager.TwoStepWorkingDirectoryManager( train_batch_dir_path_prefix, train_num_batch_dirs) valDirMgr_mp = manager.WorkingDirectoryManager( val_batch_dir_path_prefix, val_num_batch_dirs) #trainFileBatchGenerator_mp = manager.NpzFileBatchGenerator(train_images, train_labels, batch_size, shuffle, False, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename) #trainFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) #valFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) #-------------------- if False: # Multiprocessing only. # FIXME [fix] >> This code does not work. # TensorFlow session and saver cannot be passed to a worker procedure in using multiprocessing.pool.apply_async(). #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock, )) as pool: training_results = pool.apply_async( training_worker_proc, args=(train_session, nnTrainer, trainDirMgr_mp, valDirMgr_mp, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False)) data_augmentation_results = pool.map_async( partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_images, train_labels, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) training_results.get(timeout) data_augmentation_results.get(timeout) else: # Multiprocessing (augmentation) + multithreading (training). training_worker_thread = threading.Thread( target=training_worker_proc, args=(train_session, nnTrainer, trainDirMgr_mp, valDirMgr_mp, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False)) training_worker_thread.start() #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock, )) as pool: data_augmentation_results = pool.map_async( partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_images, train_labels, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) data_augmentation_results.get(timeout) training_worker_thread.join() elif use_file_batch_loader: train_num_batch_dirs = num_epochs trainDirMgr = WorkingDirectoryManager(train_batch_dir_path_prefix, train_num_batch_dirs) # TODO [improve] >> Not-so-good implementation. # Usaually training is performed for much more epochs, so too many batches have to be generated before training. for _ in range(train_num_batch_dirs): while True: train_dir_path = trainDirMgr.requestDirectory() if train_dir_path is not None: break else: time.sleep(0.1) print('\tGot a train batch directory: {}.'.format( train_dir_path)) trainFileBatchGenerator = NpzFileBatchGenerator( train_images, train_labels, batch_size, shuffle, False, batch_info_csv_filename=batch_info_csv_filename) trainFileBatchGenerator.saveBatches( train_dir_path) # Generates and saves batches. trainDirMgr.returnDirectory(train_dir_path) #-------------------- trainFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) valFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_loader( sess, nnTrainer, trainFileBatchLoader, valFileBatchLoader, trainDirMgr, valDirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False) print('\tTotal training time = {}'.format(time.time() - start_time)) else: trainBatchGenerator = SimpleBatchGenerator(train_images, train_labels, batch_size, shuffle, False, augmenter, is_output_augmented) valBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_generator( sess, nnTrainer, trainBatchGenerator, valBatchGenerator, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False) print('\tTotal training time = {}'.format(time.time() - start_time)) #-------------------- if use_file_batch_loader: valFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_file_batch_loader( sess, nnEvaluator, valFileBatchLoader, valDirMgr, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) else: valBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_batch_generator( sess, nnEvaluator, valBatchGenerator, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infers. if use_file_batch_loader: testDirMgr = WorkingDirectoryManager(test_batch_dir_path_prefix, test_num_batch_dirs) #-------------------- while True: test_dir_path = testDirMgr.requestDirectory() if test_dir_path is not None: break else: time.sleep(0.1) print('\tGot a test batch directory: {}.'.format(test_dir_path)) testFileBatchGenerator = NpzFileBatchGenerator( test_images, test_labels, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) testFileBatchGenerator.saveBatches( test_dir_path) # Generates and saves batches. testDirMgr.returnDirectory(test_dir_path) #-------------------- testFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_file_batch_loader( sess, nnInferrer, testFileBatchLoader, testDirMgr, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) else: testBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_batch_generator( sess, nnInferrer, testBatchGenerator, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: inferences = np.vstack(inferences) if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero( np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def simple_npz_file_batch_generator_from_image_files_and_loader_example(): num_examples = 256 npy_input_filepaths, output_seqs = generate_image_file_dataset( './image_files', num_examples) num_loaded_files = 57 num_epochs = 7 batch_size = 12 shuffle = True is_time_major = False batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) batch_info_csv_filename = 'batch_info.csv' #augmenter = augment_identically #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter() is_output_augmented = False #-------------------- for epoch in range(num_epochs): print('>>>>> Epoch #{}.'.format(epoch)) while True: dir_path = dirMgr.requestDirectory() if dir_path is not None: break else: time.sleep(0.1) print('\t>>>>> Directory: {}.'.format(dir_path)) #fileBatchGenerator = NpzFileBatchGeneratorFromImageFiles(npy_input_filepaths, output_seqs, num_loaded_files, batch_size, shuffle, is_time_major) fileBatchGenerator = NpzFileBatchGeneratorFromImageFiles( npy_input_filepaths, output_seqs, num_loaded_files, batch_size, shuffle, is_time_major, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename) num_saved_examples = fileBatchGenerator.saveBatches( dir_path) # Generates and saves batches. fileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename=batch_info_csv_filename) batches = fileBatchLoader.loadBatches(dir_path) # Loads batches. #dirMgr.returnDirectory(dir_path) # If dir_path is returned before completing a job, dir_path can be used in a different job. num_loaded_examples = 0 for idx, (batch_data, num_batch_examples) in enumerate(batches): # Can run in an individual thread or process. # Augment each batch (inputs & outputs). # Train with each batch (inputs & outputs). #print('\t{}: {}, {}, {}'.format(idx, num_batch_examples, batch_data[0].shape, batch_data[1].shape)) print('\t{}: {}, {}-{}, {}-{}'.format( idx, num_batch_examples, batch_data[0].shape, np.max(np.reshape(batch_data[0], (batch_data[0].shape[0], -1)), axis=-1), batch_data[1].shape, np.max(np.reshape(batch_data[1], (batch_data[1].shape[0], -1)), axis=-1))) num_loaded_examples += num_batch_examples print('#saved examples =', num_saved_examples) print('#loaded examples =', num_loaded_examples) dirMgr.returnDirectory(dir_path)