def test_train(gpu_number): """ Initialize the input class to get the configuration """ config = configMain() manager = DatasetManager(config.config_input) """ Get the batch tensor that is going to be used around """ batch_tensor = manager.train.get_batch_tensor() batch_tensor_val = manager.validation.get_batch_tensor() config_gpu = tf.ConfigProto() config_gpu.gpu_options.visible_device_list = gpu_number sess = tf.Session(config=config_gpu) manager.start_training_queueing(sess) manager.start_validation_queueing(sess) training_manager = TrainManager(config.config_train) training_manager.build_network() training_manager.build_loss() training_manager.build_optimization() """ Initializing Session as variables that control the session """ sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.all_variables(), max_to_keep=0) """Load a previous model if it is configured to restore """ cpkt = 0 if config.config_train.restore: cpkt = restore_session(sess, saver, config.models_path) for i in range(10): # RUn a few training steps training_manager.run_train_step(batch_tensor, sess, i)
def evaluate(gpu_number, experiment_name): """ Initialize the input class to get the configuration """ conf_module = __import__(experiment_name) config = conf_module.configMain() manager = DatasetManager(conf_module.configInput()) """ Get the batch tensor that is going to be used around """ batch_tensor = manager.train.get_batch_tensor() batch_tensor_val = manager.validation.get_batch_tensor() config_gpu = tf.ConfigProto() config_gpu.gpu_options.visible_device_list = gpu_number sess = tf.Session(config=config_gpu) manager.start_training_queueing(sess) manager.start_validation_queueing(sess) training_manager = TrainManager(conf_module.configTrain()) training_manager.build_network() training_manager.build_loss() training_manager.build_optimization() """ Initializing Session as variables that control the session """ sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=0) """Load a previous model if it is configured to restore """ cpkt = 0 if config.restore: cpkt = restore_session(sess, saver, config.models_path) # """Training""" # """ Get the Last Iteration Trained """ initialIteration = get_last_iteration(cpkt) output_manager = OutputManager(conf_module.configOutput(), training_manager, sess, batch_tensor_val) # output_manager = output_class.get_output_manager() # # CREATE HERE THE TF SESSION for i in range(initialIteration, config.number_iterations): # """ Get the training batch """ # """ Run the training step and monitor its execution time """ # #print "NEXT STEP GETING" training_manager.run_train_step(batch_tensor, sess, i) # #print "RUNNED STEP" duration = time.time() - start_time # """ With the current trained net, let the outputmanager print and save all the outputs """ output_manager.print_outputs(i, duration)
def load_system(config): config.batch_size = 1 config.is_training = False training_manager = TrainManager(config) training_manager.build_network() return training_manager
def load_system(config): config.batch_size = 1 config.is_training = False training_manager = TrainManager(config) training_manager.build_network() """ Initializing Session as variables that control the session """ return training_manager
def load_system(config): config.batch_size = 1 config.is_training = False training_manager = TrainManager(config, None) if hasattr(config, 'seg_network_erfnet_one_hot'): training_manager.build_seg_network_erfnet_one_hot() print("Bulding: seg_network_erfnet_one_hot") else: training_manager.build_network() print("Bulding: standard_network") return training_manager
def train(gpu_number, experiment_name): """ Initialize the input class to get the configuration """ conf_module = __import__(experiment_name) config = conf_module.configMain() manager = DatasetManager(conf_module.configInput()) """ Get the batch tensor that is going to be used around """ batch_tensor = manager.train.get_batch_tensor() batch_tensor_val = manager.validation.get_batch_tensor() config_gpu = tf.ConfigProto() config_gpu.gpu_options.visible_device_list = gpu_number sess = tf.Session(config=config_gpu) manager.start_training_queueing(sess) manager.start_validation_queueing(sess) training_manager = TrainManager(conf_module.configTrain()) print 'building network' training_manager.build_network() print 'building loss' training_manager.build_loss() print 'building optimization' training_manager.build_optimization() """ Initializing Session as variables that control the session """ print 'initializing variables' sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=0) """Load a previous model if it is configured to restore """ print 'restoring checkpoint' cpkt = 0 if config.restore: cpkt = restore_session(sess, saver, config.models_path) # """Training""" # """ Get the Last Iteration Trained """ initialIteration = get_last_iteration(cpkt) output_manager = OutputManager(conf_module.configOutput(), training_manager, sess, batch_tensor_val) # output_manager = output_class.get_output_manager() # # CREATE HERE THE TF SESSION print_flag = True for i in range(initialIteration, config.number_iterations): # """ Get the training batch """ # start_time = time.time() # """Save the model every 300 iterations""" start_time = time.time() if i % 6000 == 0: save_model(saver, sess, config.models_path, i) training_manager.run_train_step(batch_tensor, sess, i) # #print "RUNNED STEP" duration = time.time() - start_time # """ With the current trained net, let the outputmanager print and save all the outputs """ output_manager.print_outputs(i, duration) if print_flag == True: #print entire batch(only once) to check if it is balanced np.savetxt( 'batch_values.txt', output_manager._logger._last_batch_inputs, newline=' ', ) print_flag = False
def train(experiment_name, memory_fraction): """ Initialize the input class to get the configuration """ conf_module = __import__(experiment_name) config_main = conf_module.configMain() config_input = conf_module.configInput() if config_input.use_perception_stack: use_mode = {} for key in config_input.perception_num_replicates: if config_input.perception_num_replicates[key] > 0: assert (config_input.batch_size % config_input.perception_batch_sizes[key] == 0) use_mode[key] = True else: use_mode[key] = False all_params = use_mode.copy() if hasattr(config_input, "perception_other_params"): all_params.update(config_input.perception_other_params) perception_interface = Perceptions( batch_size=config_input.perception_batch_sizes, gpu_assignment=config_input.perception_gpus, compute_methods={}, viz_methods={}, num_replicates=config_input.perception_num_replicates, path_config=config_input.perception_paths, **all_params ) time.sleep(config_input.perception_initialization_sleep) else: perception_interface = None config_gpu = tf.ConfigProto() config_gpu.gpu_options.per_process_gpu_memory_fraction = float(memory_fraction) sess = tf.Session(config=config_gpu) dataset_manager = DatasetManager(conf_module.configInput(), perception_interface) batch_tensor = dataset_manager.train.get_batch_tensor() batch_tensor_val = dataset_manager.validation.get_batch_tensor() dataset_manager.start_training_queueing(sess) dataset_manager.start_validation_queueing(sess) training_manager = TrainManager(conf_module.configTrain(), None, placeholder_input=False, batch_tensor=batch_tensor) if hasattr(conf_module.configTrain(), 'seg_network_erfnet_one_hot'): print("Bulding: seg_network_erfnet_one_hot") training_manager.build_seg_network_erfnet_one_hot() else: print("Bulding: standard_network") training_manager.build_network() training_manager.build_loss() training_manager.build_optimization() sess.run(tf.global_variables_initializer()) # leave the segmentation model there, since we want to rerun his model sometime if config_main.segmentation_model != None: print("the segmentation model name is: ", config_main.segmentation_model_name) variables_to_restore = slim.get_variables( scope=str(config_main.segmentation_model_name)) saver = tf.train.Saver(variables_to_restore, max_to_keep=0) restore_session(sess, saver, config_main.segmentation_model) variables_to_restore = list(set(tf.global_variables()) - set( slim.get_variables(scope=str(config_main.segmentation_model_name)))) else: variables_to_restore = tf.global_variables() saver = tf.train.Saver(variables_to_restore, max_to_keep=0) cpkt = restore_session(sess, saver, config_main.models_path) if not cpkt and hasattr(config_main, "reload_other_models"): cpkt = restore_session(sess, saver, config_main.reload_other_models) initialIteration = 0 else: initialIteration = get_last_iteration(cpkt) all_saver = tf.train.Saver(tf.global_variables(), max_to_keep=0) # Creates a manager to manger the screen output and also validation outputs if config_main.output_is_on: output_manager = OutputManager(conf_module.configOutput(), training_manager, conf_module.configTrain(), sess, batch_tensor_val) # Creates a test manager that connects to a server and tests there constantly for i in range(initialIteration, config_main.number_iterations): start_time = time.time() if i % 3000 == 0: if config_main.segmentation_model != None: save_model(saver, sess, config_main.models_path + '/ctrl', i) save_model(all_saver, sess, config_main.models_path, i) #print("running a step") training_manager.run_train_step(batch_tensor, sess, i) #print("finished a step") duration = time.time() - start_time # """ With the current trained net, let the outputmanager print and save all the outputs """ if config_main.output_is_on: output_manager.print_outputs(i, duration)