Esempio n. 1
0
def training_worker_proc(train_session, nnTrainer, trainDirMgr, valDirMgr,
                         batch_info_csv_filename, num_epochs,
                         does_resume_training, train_saver, output_dir_path,
                         checkpoint_dir_path, train_summary_dir_path,
                         val_summary_dir_path, is_time_major,
                         is_sparse_output):
    print('\t{}: Start training worker process.'.format(os.getpid()))

    trainFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename,
                                              data_processing_functor=None)
    valFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename,
                                            data_processing_functor=None)

    #--------------------
    start_time = time.time()
    with train_session.as_default() as sess:
        with sess.graph.as_default():
            swl_tf_util.train_neural_net_by_file_batch_loader(
                sess, nnTrainer, trainFileBatchLoader, valFileBatchLoader,
                trainDirMgr, valDirMgr, num_epochs, does_resume_training,
                train_saver, output_dir_path, checkpoint_dir_path,
                train_summary_dir_path, val_summary_dir_path, is_time_major,
                is_sparse_output)
    print('\tTotal training time = {}'.format(time.time() - start_time))

    print('\t{}: End training worker process.'.format(os.getpid()))
Esempio n. 2
0
def training_worker_proc(dirMgr, batch_info_csv_filename, num_epochs):
    print('\t{}: Start training worker process.'.format(os.getpid()))

    for epoch in range(num_epochs):
        print('\t{}: Request a working directory: epoch {}.'.format(
            os.getpid(), epoch))
        while True:
            """
			global_lock.acquire()
			try:
				dir_path = dirMgr.requestDirectory()
			finally:
				global_lock.release()
			"""
            with global_lock:
                dir_path = dirMgr.requestDirectory()

            if dir_path is not None:
                break
            else:
                time.sleep(0.1)
        print('\t{}: Got a working directory: {}.'.format(
            os.getpid(), dir_path))

        #--------------------
        fileBatchLoader = NpzFileBatchLoader(
            batch_info_csv_filename=batch_info_csv_filename)
        batches = fileBatchLoader.loadBatches(dir_path)  # Loads batches.
        for idx, (batch_data, num_batch_examples) in enumerate(batches):
            # Train with each batch (inputs & outputs).
            #print('\t{}: {}, {}, {}'.format(idx, num_batch_examples, batch_data[0].shape, batch_data[1].shape))
            print('\t{}: {}, {}-{}, {}-{}'.format(
                idx, num_batch_examples, batch_data[0].shape,
                np.max(np.reshape(batch_data[0], (batch_data[0].shape[0], -1)),
                       axis=-1), batch_data[1].shape,
                np.max(np.reshape(batch_data[1], (batch_data[1].shape[0], -1)),
                       axis=-1)))

        #--------------------
        """
		global_lock.acquire()
		try:
			dirMgr.returnDirectory(dir_path)
		finally:
			global_lock.release()
		"""
        with global_lock:
            dirMgr.returnDirectory(dir_path)
        print('\t{}: Returned a directory: {}.'.format(os.getpid(), dir_path))

    print('\t{}: End training worker process.'.format(os.getpid()))
Esempio n. 3
0
def main():
	#np.random.seed(7)

	#--------------------
	# Sets parameters.

	does_need_training = True
	does_resume_training = False

	output_dir_prefix = 'synth90k_crnn'
	output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S')
	#output_dir_suffix = '20180302T155710'

	initial_epoch = 0

	# When outputs are not sparse, CRNN model's output shape = (samples, 32, num_classes) and dataset's output shape = (samples, 23, num_classes).
	is_sparse_output = True  # Fixed.
	#is_time_major = False  # Fixed.

	# NOTE [info] >> Places with the same parameters.
	#	class Synth90kLabelConverter in ${SWL_PYTHON_HOME}/test/language_processing/synth90k_dataset_test.py.
	#	class Synth90kPreprocessor.

	image_height, image_width, image_channel = 32, 128, 1
	max_label_len = 23  # Max length of words in lexicon.

	# Label: 0~9 + a~z + A~Z.
	#label_characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
	# Label: 0~9 + a~z.
	label_characters = '0123456789abcdefghijklmnopqrstuvwxyz'

	SOS = '<SOS>'  # All strings will start with the Start-Of-String token.
	EOS = '<EOS>'  # All strings will end with the End-Of-String token.
	#extended_label_list = [SOS] + list(label_characters) + [EOS]
	extended_label_list = list(label_characters) + [EOS]
	#extended_label_list = list(label_characters)

	label_int2char = extended_label_list
	label_char2int = {c:i for i, c in enumerate(extended_label_list)}

	num_labels = len(extended_label_list)
	num_classes = num_labels + 1  # extended labels + blank label.
	# NOTE [info] >> The largest value (num_classes - 1) is reserved for the blank label.
	blank_label = num_classes - 1
	label_eos_token = label_char2int[EOS]
	#label_eos_token = blank_label

	batch_size = 256  # Number of samples per gradient update.
	num_epochs = 100  # Number of times to iterate over training data.
	shuffle = True

	augmenter = ImgaugAugmenter()
	#augmenter = create_imgaug_augmenter()  # If imgaug augmenter is used, data are augmented in background augmentation processes. (faster)
	is_output_augmented = False

	#use_multiprocessing = True  # Fixed. Batch generators & loaders are used in case of multiprocessing.
	#use_file_batch_loader = True  # Fixed. It is not related to multiprocessing.
	num_loaded_files_at_a_time = 5

	num_processes = 5
	train_batch_dir_path_prefix = './train_batch_dir'
	num_train_batch_dirs = 10
	val_batch_dir_path_prefix = './val_batch_dir'
	num_val_batch_dirs = 1
	test_batch_dir_path_prefix = './test_batch_dir'
	num_test_batch_dirs = 1
	batch_info_csv_filename = 'batch_info.csv'

	sess_config = tf.ConfigProto()
	#sess_config.device_count = {'GPU': 2}
	#sess_config.allow_soft_placement = True
	sess_config.log_device_placement = True
	sess_config.gpu_options.allow_growth = True
	#sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4  # Only allocate 40% of the total memory of each GPU.

	#--------------------
	# Prepares multiprocessing.

	# set_start_method() should not be used more than once in the program.
	#mp.set_start_method('spawn')

	BaseManager.register('WorkingDirectoryManager', WorkingDirectoryManager)
	BaseManager.register('TwoStepWorkingDirectoryManager', TwoStepWorkingDirectoryManager)
	BaseManager.register('NpzFileBatchGeneratorFromNpyFiles', NpzFileBatchGeneratorFromNpyFiles)
	#BaseManager.register('NpzFileBatchLoader', NpzFileBatchLoader)
	manager = BaseManager()
	manager.start()

	lock = mp.Lock()
	#lock = mp.Manager().Lock()  # TypeError: can't pickle _thread.lock objects.

	#--------------------
	# Prepares directories.

	output_dir_path = os.path.join('.', '{}_{}'.format(output_dir_prefix, output_dir_suffix))
	checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint')
	inference_dir_path = os.path.join(output_dir_path, 'inference')
	train_summary_dir_path = os.path.join(output_dir_path, 'train_log')
	val_summary_dir_path = os.path.join(output_dir_path, 'val_log')

	swl_util.make_dir(checkpoint_dir_path)
	swl_util.make_dir(inference_dir_path)
	swl_util.make_dir(train_summary_dir_path)
	swl_util.make_dir(val_summary_dir_path)

	#--------------------
	# Prepares data.

	# NOTE [info] >> Generate synth90k dataset using swl.language_processing.synth90k_dataset.save_synth90k_dataset_to_npy_files().
	#	Refer to ${SWL_PYTHON_HOME}/test/language_processing/synth90k_dataset_test.py.

	synth90k_base_dir_path = './synth90k_npy'
	train_input_filepaths, train_output_filepaths, val_input_filepaths, val_output_filepaths, test_input_filepaths, test_output_filepaths = load_data(synth90k_base_dir_path)

	#--------------------
	# Creates models, sessions, and graphs.

	# Creates graphs.
	if does_need_training:
		train_graph = tf.Graph()
		eval_graph = tf.Graph()
	infer_graph = tf.Graph()

	if does_need_training:
		with train_graph.as_default():
			#K.set_learning_phase(1)  # Sets the learning phase to 'train'. (Required)

			# Creates a model.
			modelForTraining = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output)
			modelForTraining.create_training_model()

			# Creates a trainer.
			nnTrainer = SimpleCrnnTrainer(modelForTraining, initial_epoch)

			# Creates a saver.
			#	Saves a model every 2 hours and maximum 5 latest models are saved.
			train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2)

			initializer = tf.global_variables_initializer()

		with eval_graph.as_default():
			#K.set_learning_phase(0)  # Sets the learning phase to 'test'. (Required)

			# Creates a model.
			modelForEvaluation = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output)
			modelForEvaluation.create_evaluation_model()

			# Creates an evaluator.
			nnEvaluator = NeuralNetEvaluator(modelForEvaluation)

			# Creates a saver.
			eval_saver = tf.train.Saver()

	with infer_graph.as_default():
		#K.set_learning_phase(0)  # Sets the learning phase to 'test'. (Required)

		# Creates a model.
		modelForInference = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output)
		modelForInference.create_inference_model()

		# Creates an inferrer.
		nnInferrer = NeuralNetInferrer(modelForInference)

		# Creates a saver.
		infer_saver = tf.train.Saver()

	# Creates sessions.
	if does_need_training:
		train_session = tf.Session(graph=train_graph, config=sess_config)
		eval_session = tf.Session(graph=eval_graph, config=sess_config)
	infer_session = tf.Session(graph=infer_graph, config=sess_config)

	# Initializes.
	if does_need_training:
		train_session.run(initializer)

	#%%------------------------------------------------------------------
	# Trains and evaluates.

	if does_need_training:
		valDirMgr = WorkingDirectoryManager(val_batch_dir_path_prefix, num_val_batch_dirs)

		print('\tWaiting for a validation batch directory...')
		while True:
			val_dir_path = valDirMgr.requestDirectory()
			if val_dir_path is not None:
				break
			else:
				time.sleep(0.1)
		print('\tGot a validation batch directory: {}.'.format(val_dir_path))

		valFileBatchGenerator = NpzFileBatchGeneratorFromNpyFiles(val_input_filepaths, val_output_filepaths, num_loaded_files_at_a_time, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename)
		num_saved_examples  = valFileBatchGenerator.saveBatches(val_dir_path)  # Generates and saves batches.
		print('\t#saved examples = {}.'.format(num_saved_examples))

		valDirMgr.returnDirectory(val_dir_path)				

		#--------------------
		# Multiprocessing (augmentation) + multithreading (training).				

		trainDirMgr = TwoStepWorkingDirectoryManager(train_batch_dir_path_prefix, num_train_batch_dirs)

		training_worker_thread = threading.Thread(target=training_worker_proc, args=(train_session, nnTrainer, trainDirMgr, valDirMgr, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, is_sparse_output))
		training_worker_thread.start()

		trainDirMgr_mp = manager.TwoStepWorkingDirectoryManager(train_batch_dir_path_prefix, num_train_batch_dirs)
		#valDirMgr_mp = manager.WorkingDirectoryManager(val_batch_dir_path_prefix, num_val_batch_dirs)

		#trainFileBatchGenerator_mp = manager.NpzFileBatchGeneratorFromNpyFiles(train_input_filepaths, train_output_filepaths, num_loaded_files_at_a_time, batch_size, shuffle, False, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename)
		#trainFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output))
		#valFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output))

		#timeout = 10
		timeout = None
		with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock,)) as pool:
			data_augmentation_results = pool.map_async(partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_input_filepaths, train_output_filepaths, num_loaded_files_at_a_time, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)])

			data_augmentation_results.get(timeout)

		training_worker_thread.join()

		#--------------------
		valFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output))

		start_time = time.time()
		with eval_session.as_default() as sess:
			with sess.graph.as_default():
				swl_tf_util.evaluate_neural_net_by_file_batch_loader(sess, nnEvaluator, valFileBatchLoader, valDirMgr, eval_saver, checkpoint_dir_path, False, False)
		print('\tTotal evaluation time = {}'.format(time.time() - start_time))

	#%%------------------------------------------------------------------
	# Infers.

	testDirMgr = WorkingDirectoryManager(test_batch_dir_path_prefix, num_test_batch_dirs)

	#--------------------
	print('\tWaiting for a test batch directory...')
	while True:
		test_dir_path = testDirMgr.requestDirectory()
		if test_dir_path is not None:
			break
		else:
			time.sleep(0.1)
	print('\tGot a test batch directory: {}.'.format(test_dir_path))

	testFileBatchGenerator = NpzFileBatchGeneratorFromNpyFiles(test_input_filepaths, test_output_filepaths, num_loaded_files_at_a_time, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename)
	num_saved_examples = testFileBatchGenerator.saveBatches(test_dir_path)  # Generates and saves batches.
	print('\t#saved examples = {}.'.format(num_saved_examples))

	testDirMgr.returnDirectory(test_dir_path)				

	#--------------------
	testFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output))

	start_time = time.time()
	with infer_session.as_default() as sess:
		with sess.graph.as_default():
			inferences = swl_tf_util.infer_by_neural_net_and_file_batch_loader(sess, nnInferrer, testFileBatchLoader, testDirMgr, infer_saver, checkpoint_dir_path, False)
	print('\tTotal inference time = {}'.format(time.time() - start_time))

	#--------------------
	if inferences is not None:
		if num_classes >= 2:
			inferences = np.argmax(inferences, -1)
			groundtruths = np.argmax(test_labels, -1)
		else:
			inferences = np.around(inferences)
			groundtruths = test_labels
		correct_estimation_count = np.count_nonzero(np.equal(inferences, groundtruths))
		print('\tAccurary = {} / {} = {}'.format(correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size))
	else:
		print('[SWL] Warning: Invalid inference results.')

	#--------------------
	# Closes sessions.

	if does_need_training:
		train_session.close()
		del train_session
		eval_session.close()
		del eval_session
	infer_session.close()
	del infer_session
Esempio n. 4
0
def main():
    #np.random.seed(7)

    #--------------------
    # Sets parameters.

    does_need_training = True
    does_resume_training = False

    output_dir_prefix = 'mnist_cnn'
    output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S')
    #output_dir_suffix = '20190127T001424'

    initial_epoch = 0

    num_classes = 10
    input_shape = (None, 28, 28, 1)  # 784 = 28 * 28.
    output_shape = (None, num_classes)

    batch_size = 128  # Number of samples per gradient update.
    num_epochs = 30  # Number of times to iterate over training data.
    shuffle = True

    augmenter = ImgaugAugmenter()
    is_output_augmented = False

    use_multiprocessing = True  # Batch generators & loaders are used in case of multiprocessing.
    use_file_batch_loader = True  # Is not related to multiprocessing.

    num_processes = 5
    train_batch_dir_path_prefix = './train_batch_dir'
    #train_num_batch_dirs = 5
    val_batch_dir_path_prefix = './val_batch_dir'
    val_num_batch_dirs = 1
    test_batch_dir_path_prefix = './test_batch_dir'
    test_num_batch_dirs = 1
    batch_info_csv_filename = 'batch_info.csv'

    sess_config = tf.ConfigProto()
    #sess_config.device_count = {'GPU': 2}
    #sess_config.allow_soft_placement = True
    sess_config.log_device_placement = True
    sess_config.gpu_options.allow_growth = True
    #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4  # Only allocate 40% of the total memory of each GPU.

    #--------------------
    # Prepares multiprocessing.

    if use_multiprocessing:
        # set_start_method() should not be used more than once in the program.
        #mp.set_start_method('spawn')

        BaseManager.register('WorkingDirectoryManager',
                             WorkingDirectoryManager)
        BaseManager.register('TwoStepWorkingDirectoryManager',
                             TwoStepWorkingDirectoryManager)
        BaseManager.register('NpzFileBatchGenerator', NpzFileBatchGenerator)
        #BaseManager.register('NpzFileBatchLoader', NpzFileBatchLoader)
        manager = BaseManager()
        manager.start()

        lock = mp.Lock()
        #lock= mp.Manager().Lock()  # TypeError: can't pickle _thread.lock objects.

    #--------------------
    # Prepares directories.

    output_dir_path = os.path.join(
        '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix))
    checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint')
    inference_dir_path = os.path.join(output_dir_path, 'inference')
    train_summary_dir_path = os.path.join(output_dir_path, 'train_log')
    val_summary_dir_path = os.path.join(output_dir_path, 'val_log')

    swl_util.make_dir(checkpoint_dir_path)
    swl_util.make_dir(inference_dir_path)
    swl_util.make_dir(train_summary_dir_path)
    swl_util.make_dir(val_summary_dir_path)

    #--------------------
    # Prepares data.

    train_images, train_labels, test_images, test_labels = load_data(
        input_shape[1:])

    #--------------------
    # Creates models, sessions, and graphs.

    # Creates graphs.
    if does_need_training:
        train_graph = tf.Graph()
        eval_graph = tf.Graph()
    infer_graph = tf.Graph()

    if does_need_training:
        with train_graph.as_default():
            # Creates a model.
            modelForTraining = create_mnist_cnn(input_shape, output_shape)
            modelForTraining.create_training_model()

            # Creates a trainer.
            nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch)

            # Creates a saver.
            #	Saves a model every 2 hours and maximum 5 latest models are saved.
            train_saver = tf.train.Saver(max_to_keep=5,
                                         keep_checkpoint_every_n_hours=2)

            initializer = tf.global_variables_initializer()

        with eval_graph.as_default():
            # Creates a model.
            modelForEvaluation = create_mnist_cnn(input_shape, output_shape)
            modelForEvaluation.create_evaluation_model()

            # Creates an evaluator.
            nnEvaluator = NeuralNetEvaluator(modelForEvaluation)

            # Creates a saver.
            eval_saver = tf.train.Saver()

    with infer_graph.as_default():
        # Creates a model.
        modelForInference = create_mnist_cnn(input_shape, output_shape)
        modelForInference.create_inference_model()

        # Creates an inferrer.
        nnInferrer = NeuralNetInferrer(modelForInference)

        # Creates a saver.
        infer_saver = tf.train.Saver()

    # Creates sessions.
    if does_need_training:
        train_session = tf.Session(graph=train_graph, config=sess_config)
        eval_session = tf.Session(graph=eval_graph, config=sess_config)
    infer_session = tf.Session(graph=infer_graph, config=sess_config)

    # Initializes.
    if does_need_training:
        train_session.run(initializer)

    #%%------------------------------------------------------------------
    # Trains and evaluates.

    if does_need_training:
        if use_file_batch_loader or use_multiprocessing:
            valDirMgr = WorkingDirectoryManager(val_batch_dir_path_prefix,
                                                val_num_batch_dirs)

            while True:
                val_dir_path = valDirMgr.requestDirectory()
                if val_dir_path is not None:
                    break
                else:
                    time.sleep(0.1)
            print(
                '\tGot a validation batch directory: {}.'.format(val_dir_path))

            valFileBatchGenerator = NpzFileBatchGenerator(
                test_images,
                test_labels,
                batch_size,
                False,
                False,
                batch_info_csv_filename=batch_info_csv_filename)
            valFileBatchGenerator.saveBatches(
                val_dir_path)  # Generates and saves batches.

            valDirMgr.returnDirectory(val_dir_path)

        if use_multiprocessing:
            train_num_batch_dirs = 5
            trainDirMgr_mp = manager.TwoStepWorkingDirectoryManager(
                train_batch_dir_path_prefix, train_num_batch_dirs)
            valDirMgr_mp = manager.WorkingDirectoryManager(
                val_batch_dir_path_prefix, val_num_batch_dirs)

            #trainFileBatchGenerator_mp = manager.NpzFileBatchGenerator(train_images, train_labels, batch_size, shuffle, False, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename)
            #trainFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None)
            #valFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None)

            #--------------------
            if False:
                # Multiprocessing only.

                # FIXME [fix] >> This code does not work.
                #	TensorFlow session and saver cannot be passed to a worker procedure in using multiprocessing.pool.apply_async().

                #timeout = 10
                timeout = None
                with mp.Pool(processes=num_processes,
                             initializer=initialize_lock,
                             initargs=(lock, )) as pool:
                    training_results = pool.apply_async(
                        training_worker_proc,
                        args=(train_session, nnTrainer, trainDirMgr_mp,
                              valDirMgr_mp, batch_info_csv_filename,
                              num_epochs, does_resume_training, train_saver,
                              output_dir_path, checkpoint_dir_path,
                              train_summary_dir_path, val_summary_dir_path,
                              False, False))
                    data_augmentation_results = pool.map_async(
                        partial(augmentation_worker_proc, augmenter,
                                is_output_augmented, batch_info_csv_filename,
                                trainDirMgr_mp, train_images, train_labels,
                                batch_size, shuffle, False),
                        [epoch for epoch in range(num_epochs)])

                    training_results.get(timeout)
                    data_augmentation_results.get(timeout)
            else:
                # Multiprocessing (augmentation) + multithreading (training).

                training_worker_thread = threading.Thread(
                    target=training_worker_proc,
                    args=(train_session, nnTrainer, trainDirMgr_mp,
                          valDirMgr_mp, batch_info_csv_filename, num_epochs,
                          does_resume_training, train_saver, output_dir_path,
                          checkpoint_dir_path, train_summary_dir_path,
                          val_summary_dir_path, False, False))
                training_worker_thread.start()

                #timeout = 10
                timeout = None
                with mp.Pool(processes=num_processes,
                             initializer=initialize_lock,
                             initargs=(lock, )) as pool:
                    data_augmentation_results = pool.map_async(
                        partial(augmentation_worker_proc, augmenter,
                                is_output_augmented, batch_info_csv_filename,
                                trainDirMgr_mp, train_images, train_labels,
                                batch_size, shuffle, False),
                        [epoch for epoch in range(num_epochs)])

                    data_augmentation_results.get(timeout)

                training_worker_thread.join()
        elif use_file_batch_loader:
            train_num_batch_dirs = num_epochs
            trainDirMgr = WorkingDirectoryManager(train_batch_dir_path_prefix,
                                                  train_num_batch_dirs)

            # TODO [improve] >> Not-so-good implementation.
            #	Usaually training is performed for much more epochs, so too many batches have to be generated before training.
            for _ in range(train_num_batch_dirs):
                while True:
                    train_dir_path = trainDirMgr.requestDirectory()
                    if train_dir_path is not None:
                        break
                    else:
                        time.sleep(0.1)
                print('\tGot a train batch directory: {}.'.format(
                    train_dir_path))

                trainFileBatchGenerator = NpzFileBatchGenerator(
                    train_images,
                    train_labels,
                    batch_size,
                    shuffle,
                    False,
                    batch_info_csv_filename=batch_info_csv_filename)
                trainFileBatchGenerator.saveBatches(
                    train_dir_path)  # Generates and saves batches.

                trainDirMgr.returnDirectory(train_dir_path)

            #--------------------
            trainFileBatchLoader = NpzFileBatchLoader(
                batch_info_csv_filename, data_processing_functor=None)
            valFileBatchLoader = NpzFileBatchLoader(
                batch_info_csv_filename, data_processing_functor=None)

            start_time = time.time()
            with train_session.as_default() as sess:
                with sess.graph.as_default():
                    swl_tf_util.train_neural_net_by_file_batch_loader(
                        sess, nnTrainer, trainFileBatchLoader,
                        valFileBatchLoader, trainDirMgr, valDirMgr, num_epochs,
                        does_resume_training, train_saver, output_dir_path,
                        checkpoint_dir_path, train_summary_dir_path,
                        val_summary_dir_path, False, False)
            print('\tTotal training time = {}'.format(time.time() -
                                                      start_time))
        else:
            trainBatchGenerator = SimpleBatchGenerator(train_images,
                                                       train_labels,
                                                       batch_size, shuffle,
                                                       False, augmenter,
                                                       is_output_augmented)
            valBatchGenerator = SimpleBatchGenerator(test_images, test_labels,
                                                     batch_size, False, False)

            start_time = time.time()
            with train_session.as_default() as sess:
                with sess.graph.as_default():
                    swl_tf_util.train_neural_net_by_batch_generator(
                        sess, nnTrainer, trainBatchGenerator,
                        valBatchGenerator, num_epochs, does_resume_training,
                        train_saver, output_dir_path, checkpoint_dir_path,
                        train_summary_dir_path, val_summary_dir_path, False,
                        False)
            print('\tTotal training time = {}'.format(time.time() -
                                                      start_time))

        #--------------------
        if use_file_batch_loader:
            valFileBatchLoader = NpzFileBatchLoader(
                batch_info_csv_filename, data_processing_functor=None)

            start_time = time.time()
            with eval_session.as_default() as sess:
                with sess.graph.as_default():
                    swl_tf_util.evaluate_neural_net_by_file_batch_loader(
                        sess, nnEvaluator, valFileBatchLoader, valDirMgr,
                        eval_saver, checkpoint_dir_path, False, False)
            print('\tTotal evaluation time = {}'.format(time.time() -
                                                        start_time))
        else:
            valBatchGenerator = SimpleBatchGenerator(test_images, test_labels,
                                                     batch_size, False, False)

            start_time = time.time()
            with eval_session.as_default() as sess:
                with sess.graph.as_default():
                    swl_tf_util.evaluate_neural_net_by_batch_generator(
                        sess, nnEvaluator, valBatchGenerator, eval_saver,
                        checkpoint_dir_path, False, False)
            print('\tTotal evaluation time = {}'.format(time.time() -
                                                        start_time))

    #%%------------------------------------------------------------------
    # Infers.

    if use_file_batch_loader:
        testDirMgr = WorkingDirectoryManager(test_batch_dir_path_prefix,
                                             test_num_batch_dirs)

        #--------------------
        while True:
            test_dir_path = testDirMgr.requestDirectory()
            if test_dir_path is not None:
                break
            else:
                time.sleep(0.1)
        print('\tGot a test batch directory: {}.'.format(test_dir_path))

        testFileBatchGenerator = NpzFileBatchGenerator(
            test_images,
            test_labels,
            batch_size,
            False,
            False,
            batch_info_csv_filename=batch_info_csv_filename)
        testFileBatchGenerator.saveBatches(
            test_dir_path)  # Generates and saves batches.

        testDirMgr.returnDirectory(test_dir_path)

        #--------------------
        testFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename,
                                                 data_processing_functor=None)

        start_time = time.time()
        with infer_session.as_default() as sess:
            with sess.graph.as_default():
                inferences = swl_tf_util.infer_by_neural_net_and_file_batch_loader(
                    sess, nnInferrer, testFileBatchLoader, testDirMgr,
                    infer_saver, checkpoint_dir_path, False)
        print('\tTotal inference time = {}'.format(time.time() - start_time))
    else:
        testBatchGenerator = SimpleBatchGenerator(test_images, test_labels,
                                                  batch_size, False, False)

        start_time = time.time()
        with infer_session.as_default() as sess:
            with sess.graph.as_default():
                inferences = swl_tf_util.infer_by_neural_net_and_batch_generator(
                    sess, nnInferrer, testBatchGenerator, infer_saver,
                    checkpoint_dir_path, False)
        print('\tTotal inference time = {}'.format(time.time() - start_time))

    if inferences is not None:
        inferences = np.vstack(inferences)
        if num_classes >= 2:
            inferences = np.argmax(inferences, -1)
            groundtruths = np.argmax(test_labels, -1)
        else:
            inferences = np.around(inferences)
            groundtruths = test_labels
        correct_estimation_count = np.count_nonzero(
            np.equal(inferences, groundtruths))
        print('\tAccurary = {} / {} = {}'.format(
            correct_estimation_count, groundtruths.size,
            correct_estimation_count / groundtruths.size))
    else:
        print('[SWL] Warning: Invalid inference results.')

    #--------------------
    # Closes sessions.

    if does_need_training:
        train_session.close()
        del train_session
        eval_session.close()
        del eval_session
    infer_session.close()
    del infer_session
Esempio n. 5
0
def simple_npz_file_batch_generator_from_image_files_and_loader_example():
    num_examples = 256
    npy_input_filepaths, output_seqs = generate_image_file_dataset(
        './image_files', num_examples)
    num_loaded_files = 57

    num_epochs = 7
    batch_size = 12
    shuffle = True
    is_time_major = False

    batch_dir_path_prefix = './batch_dir'
    num_batch_dirs = 5
    dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs)

    batch_info_csv_filename = 'batch_info.csv'
    #augmenter = augment_identically
    #augmenter = IdentityAugmenter()
    augmenter = ImgaugAugmenter()
    is_output_augmented = False

    #--------------------
    for epoch in range(num_epochs):
        print('>>>>> Epoch #{}.'.format(epoch))

        while True:
            dir_path = dirMgr.requestDirectory()
            if dir_path is not None:
                break
            else:
                time.sleep(0.1)

        print('\t>>>>> Directory: {}.'.format(dir_path))

        #fileBatchGenerator = NpzFileBatchGeneratorFromImageFiles(npy_input_filepaths, output_seqs, num_loaded_files, batch_size, shuffle, is_time_major)
        fileBatchGenerator = NpzFileBatchGeneratorFromImageFiles(
            npy_input_filepaths,
            output_seqs,
            num_loaded_files,
            batch_size,
            shuffle,
            is_time_major,
            augmenter=augmenter,
            is_output_augmented=is_output_augmented,
            batch_info_csv_filename=batch_info_csv_filename)
        num_saved_examples = fileBatchGenerator.saveBatches(
            dir_path)  # Generates and saves batches.

        fileBatchLoader = NpzFileBatchLoader(
            batch_info_csv_filename=batch_info_csv_filename)
        batches = fileBatchLoader.loadBatches(dir_path)  # Loads batches.

        #dirMgr.returnDirectory(dir_path)  # If dir_path is returned before completing a job, dir_path can be used in a different job.

        num_loaded_examples = 0
        for idx, (batch_data, num_batch_examples) in enumerate(batches):
            # Can run in an individual thread or process.
            # Augment each batch (inputs & outputs).
            # Train with each batch (inputs & outputs).
            #print('\t{}: {}, {}, {}'.format(idx, num_batch_examples, batch_data[0].shape, batch_data[1].shape))
            print('\t{}: {}, {}-{}, {}-{}'.format(
                idx, num_batch_examples, batch_data[0].shape,
                np.max(np.reshape(batch_data[0], (batch_data[0].shape[0], -1)),
                       axis=-1), batch_data[1].shape,
                np.max(np.reshape(batch_data[1], (batch_data[1].shape[0], -1)),
                       axis=-1)))
            num_loaded_examples += num_batch_examples

        print('#saved examples =', num_saved_examples)
        print('#loaded examples =', num_loaded_examples)

        dirMgr.returnDirectory(dir_path)