def continue_training(logdir): """ Continues training of a model. This will load model files and weights found in logdir and continues an aborted training. Parameters ---------- logdir : string Directory with logs. """ hypes = utils.load_hypes_from_logdir(logdir) modules = utils.load_modules_from_logdir(logdir) # Tell TensorFlow that the model will be built into the default Graph. with tf.Session() as sess: # build the graph based on the loaded modules with tf.name_scope("Queues"): queue = modules['input'].create_queues(hypes, 'train') tv_graph = core.build_training_graph(hypes, queue, modules) # prepaire the tv session tv_sess = core.start_tv_session(hypes) sess = tv_sess['sess'] saver = tv_sess['saver'] logging_file = os.path.join(logdir, 'output.log') utils.create_filewrite_handler(logging_file, mode='a') logging.info("Continue training.") cur_step = core.load_weights(logdir, sess, saver) if cur_step is None: logging.warning("Loaded global_step is None.") logging.warning("This could mean," " that no weights have been loaded.") logging.warning("Starting Training with step 0.") cur_step = 0 with tf.name_scope('Validation'): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) image.set_shape([1, None, None, 3]) inf_out = core.build_inference_graph(hypes, modules, image=image) tv_graph['image_pl'] = image_pl tv_graph['inf_out'] = inf_out # Start the data load modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess) # And then after everything is built, start the training loop. run_training(hypes, modules, tv_graph, tv_sess, cur_step) # stopping input Threads tv_sess['coord'].request_stop() tv_sess['coord'].join(tv_sess['threads'])
def continue_training(logdir): """ Continues training of a model. This will load model files and weights found in logdir and continues an aborted training. Parameters ---------- logdir : string Directory with logs. """ hypes = utils.load_hypes_from_logdir(logdir) modules = utils.load_modules_from_logdir(logdir) data_input, arch, objective, solver = modules # append output to output.log logging_file = os.path.join(logdir, 'output.log') utils.create_filewrite_handler(logging_file, mode='a') # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default() as graph: # build the graph based on the loaded modules graph_ops = build_training_graph(hypes, modules) q = graph_ops[0] # prepaire the tv session sess_coll = core.start_tv_session(hypes) sess, saver, summary_op, summary_writer, coord, threads = sess_coll if hasattr(objective, 'evaluate'): with tf.name_scope('Validation'): image_pl, label_pl = _create_input_placeholder() image = tf.expand_dims(image_pl, 0) softmax = core.build_inference_graph(hypes, modules, image=image, label=label_pl) # Load weights from logdir cur_step = core.load_weights(logdir, sess, saver) # Start the data load _start_enqueuing_threads(hypes, q, sess, data_input) # And then after everything is built, start the training loop. start_time = time.time() for step in xrange(cur_step+1, hypes['solver']['max_steps']): start_time = run_training_step(hypes, step, start_time, graph_ops, sess_coll, objective, image_pl, softmax) # stopping input Threads coord.request_stop() coord.join(threads)
def continue_training(logdir): """ Continues training of a model. This will load model files and weights found in logdir and continues an aborted training. Parameters ---------- logdir : string Directory with logs. """ hypes = utils.load_hypes_from_logdir(logdir) modules = utils.load_modules_from_logdir(logdir) data_input, arch, objective, solver = modules # append output to output.log logging_file = os.path.join(logdir, 'output.log') utils.create_filewrite_handler(logging_file, mode='a') # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default() as graph: # build the graph based on the loaded modules graph_ops = build_training_graph(hypes, modules) q = graph_ops[0] # prepaire the tv session sess_coll = core.start_tv_session(hypes) sess, saver, summary_op, summary_writer, coord, threads = sess_coll if hasattr(objective, 'evaluate'): with tf.name_scope('Validation'): image_pl, label_pl = _create_input_placeholder() image = tf.expand_dims(image_pl, 0) softmax = core.build_inference_graph(hypes, modules, image=image, label=label_pl) # Load weights from logdir cur_step = core.load_weights(logdir, sess, saver) # Start the data load _start_enqueuing_threads(hypes, q, sess, data_input) # And then after everything is built, start the training loop. start_time = time.time() for step in xrange(cur_step+1, hypes['solver']['max_steps']): start_time = run_training_step(hypes, step, start_time, graph_ops, sess_coll, modules, image_pl, softmax) # stopping input Threads coord.request_stop() coord.join(threads)
def do_training(hypes): """ Train model for a number of steps. This trains the model for at most hypes['solver']['max_steps']. It shows an update every utils.cfg.step_show steps and writes the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval steps. Paramters --------- hypes : dict Hyperparameters """ # Get the sets of images and labels for training, validation, and # test on MNIST. modules = utils.load_modules_from_hypes(hypes) # set to allocate memory on GPU as needed # For more details, look at # https://stackoverflow.com/questions/36927607/how-can-i-solve-ran-out-of-gpu-memory-in-tensorflow config = tf.ConfigProto() config.gpu_options.allow_growth = True # Tell TensorFlow that the model will be built into the default Graph. with tf.Session(config=config) as sess: # build the graph based on the loaded modules with tf.name_scope("Queues"): queue = modules['input'].create_queues(hypes, 'train') tv_graph = core.build_training_graph(hypes, queue, modules) # prepaire the tv session tv_sess = core.start_tv_session(hypes) with tf.name_scope('Validation'): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) image.set_shape([1, None, None, 3]) inf_out = core.build_inference_graph(hypes, modules, image=image) tv_graph['image_pl'] = image_pl tv_graph['inf_out'] = inf_out # Start the data load modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess) # And then after everything is built, start the training loop. train.run_training(hypes, modules, tv_graph, tv_sess) # stopping input Threads tv_sess['coord'].request_stop() tv_sess['coord'].join(tv_sess['threads'])
def do_training(hypes): """ Train model for a number of steps. This trains the model for at most hypes['solver']['max_steps']. It shows an update every utils.cfg.step_show steps and writes the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval steps. Paramters --------- hypes : dict Hyperparameters """ # Get the sets of images and labels for training, validation, and # test on MNIST. modules = utils.load_modules_from_hypes(hypes) data_input, arch, objective, solver = modules # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # build the graph based on the loaded modules graph_ops = build_training_graph(hypes, modules) q = graph_ops[0] # prepaire the tv session sess_coll = core.start_tv_session(hypes) sess, saver, summary_op, summary_writer, coord, threads = sess_coll with tf.name_scope('Validation'): image_pl, label_pl = _create_input_placeholder() image = tf.expand_dims(image_pl, 0) softmax = core.build_inference_graph(hypes, modules, image=image, label=label_pl) # Start the data load _start_enqueuing_threads(hypes, q, sess, data_input) # And then after everything is built, start the training loop. start_time = time.time() for step in xrange(hypes['solver']['max_steps']): start_time = run_training_step(hypes, step, start_time, graph_ops, sess_coll, modules, image_pl, softmax) if hasattr(solver, 'update_learning_rate'): solver.update_learning_rate(hypes, step) # stopping input Threads coord.request_stop() coord.join(threads)
def do_training(hypes): """ Train model for a number of steps. This trains the model for at most hypes['solver']['max_steps']. It shows an update every utils.cfg.step_show steps and writes the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval steps. Paramters --------- hypes : dict Hyperparameters """ # Get the sets of images and labels for training, validation, and # test on MNIST. modules = utils.load_modules_from_hypes(hypes) data_input, arch, objective, solver = modules # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # build the graph based on the loaded modules graph_ops = build_training_graph(hypes, modules) q = graph_ops[0] # prepaire the tv session sess_coll = core.start_tv_session(hypes) sess, saver, summary_op, summary_writer, coord, threads = sess_coll with tf.name_scope('Validation'): image_pl, label_pl = _create_input_placeholder() image = tf.expand_dims(image_pl, 0) softmax = core.build_inference_graph(hypes, modules, image=image, label=label_pl) # Start the data load _start_enqueuing_threads(hypes, q, sess, data_input) # And then after everything is built, start the training loop. start_time = time.time() for step in xrange(hypes['solver']['max_steps']): start_time = run_training_step(hypes, step, start_time, graph_ops, sess_coll, objective, image_pl, softmax) if hasattr(solver, 'update_learning_rate'): solver.update_learning_rate(hypes, step) # stopping input Threads coord.request_stop() coord.join(threads)
def do_training(hypes): """ Train model for a number of steps. This trains the model for at most hypes['solver']['max_steps']. It shows an update every utils.cfg.step_show steps and writes the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval steps. Paramters --------- hypes : dict Hyperparameters """ modules = utils.load_modules_from_hypes(hypes) # Tell TensorFlow that the model will be built into the default Graph. with tf.Session() as sess: # build the graph based on the loaded modules logging.info("..Creating queues") with tf.name_scope("Queues"): queue = modules['input'].create_queues(hypes, 'train') logging.info("..Building training graph") tv_graph = core.build_training_graph(hypes, queue, modules) # prepare the tv session tv_sess = core.start_tv_session(hypes) with tf.name_scope('Validation'): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) image.set_shape([1, None, None, 3]) inf_out = core.build_inference_graph(hypes, modules, image=image) tv_graph['image_pl'] = image_pl tv_graph['inf_out'] = inf_out # Start the data load logging.info("..Enqueuing files") modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess) # And then after everything is built, start the training loop. logging.info("..Initializing the training") run_training(hypes, modules, tv_graph, tv_sess) # stopping input Threads tv_sess['coord'].request_stop() tv_sess['coord'].join(tv_sess['threads'])
def do_training(hypes): """ Train model for a number of steps. This trains the model for at most hypes['solver']['max_steps']. It shows an update every utils.cfg.step_show steps and writes the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval steps. Paramters --------- hypes : dict Hyperparameters """ # Get the sets of images and labels for training, validation, and # test on MNIST. modules = utils.load_modules_from_hypes(hypes) # Tell TensorFlow that the model will be built into the default Graph. with tf.Session() as sess: # build the graph based on the loaded modules with tf.name_scope("Queues"): queue = modules['input'].create_queues(hypes, 'train') tv_graph = core.build_training_graph(hypes, queue, modules) # prepaire the tv session tv_sess = core.start_tv_session(hypes) with tf.name_scope('Validation'): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) image.set_shape([1, None, None, 3]) inf_out = core.build_inference_graph(hypes, modules, image=image) tv_graph['image_pl'] = image_pl tv_graph['inf_out'] = inf_out # Start the data load modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess) # And then after everything is built, start the training loop. run_training(hypes, modules, tv_graph, tv_sess) # stopping input Threads tv_sess['coord'].request_stop() tv_sess['coord'].join(tv_sess['threads'])
def do_analyze(logdir): """ Analyze a trained model. This will load model files and weights found in logdir and run a basic analysis. Parameters ---------- logdir : string Directory with logs. """ hypes = utils.load_hypes_from_logdir(logdir) modules = utils.load_modules_from_logdir(logdir) data_input, arch, objective, solver = modules # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # prepaire the tv session with tf.name_scope('Validation'): image_pl, label_pl = _create_input_placeholder() image = tf.expand_dims(image_pl, 0) softmax = core.build_inference_graph(hypes, modules, image=image, label=label_pl) sess_coll = core.start_tv_session(hypes) sess, saver, summary_op, summary_writer, coord, threads = sess_coll core.load_weights(logdir, sess, saver) eval_dict, images = objective.tensor_eval(hypes, sess, image_pl, softmax) logging_file = os.path.join(logdir, "eval/analysis.log") utils.create_filewrite_handler(logging_file) utils.print_eval_dict(eval_dict) _write_images_to_logdir(images, logdir) return
def do_analyze(logdir): """ Analyze a trained model. This will load model files and weights found in logdir and run a basic analysis. Parameters ---------- logdir : string Directory with logs. """ hypes = utils.load_hypes_from_logdir(logdir) modules = utils.load_modules_from_logdir(logdir) data_input, arch, objective, solver = modules logging_file = os.path.join(logdir, "eval/analysis.log") utils.create_filewrite_handler(logging_file) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # build the graph based on the loaded modules graph_ops = core.build_graph(hypes, modules, train=False) q, train_op, loss, eval_lists = graph_ops q = graph_ops[0] # prepaire the tv session sess_coll = core.start_tv_session(hypes) sess, saver, summary_op, summary_writer, coord, threads = sess_coll core.load_weights(logdir, sess, saver) # Start the data load data_input.start_enqueuing_threads(hypes, q['val'], 'val', sess, hypes['dirs']['data_dir']) return core.do_eval(hypes, eval_lists, 'val', sess)
def build_united_model(meta_hypes): logging.info("Initialize training folder") subhypes = {} subgraph = {} submodules = {} subqueues = {} subgraph['debug_ops'] = {} base_path = meta_hypes['dirs']['base_path'] first_iter = True for model in meta_hypes['model_list']: subhypes_file = os.path.join(base_path, meta_hypes['models'][model]) with open(subhypes_file, 'r') as f: logging.info("f: %s", f) subhypes[model] = json.load(f) hypes = subhypes[model] utils.set_dirs(hypes, subhypes_file) hypes['dirs']['output_dir'] = meta_hypes['dirs']['output_dir'] hypes['dirs']['data_dir'] = meta_hypes['dirs']['data_dir'] train.initialize_training_folder(hypes, files_dir=model, logging=first_iter) meta_hypes['dirs']['image_dir'] = hypes['dirs']['image_dir'] submodules[model] = utils.load_modules_from_hypes(hypes, postfix="_%s" % model) modules = submodules[model] logging.info("Build %s computation Graph.", model) with tf.name_scope("Queues_%s" % model): subqueues[model] = modules['input'].create_queues(hypes, 'train') logging.info('Building Model: %s' % model) subgraph[model] = build_training_graph(hypes, subqueues[model], modules, first_iter) first_iter = False if len(meta_hypes['models']) == 2: _recombine_2_losses(meta_hypes, subgraph, subhypes, submodules) else: _recombine_3_losses(meta_hypes, subgraph, subhypes, submodules) hypes = subhypes[meta_hypes['model_list'][0]] tv_sess = core.start_tv_session(hypes) sess = tv_sess['sess'] for model in meta_hypes['model_list']: hypes = subhypes[model] modules = submodules[model] optimizer = modules['solver'] with tf.name_scope('Validation_%s' % model): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) inf_out = core.build_inference_graph(hypes, modules, image=image) subgraph[model]['image_pl'] = image_pl subgraph[model]['inf_out'] = inf_out # Start the data load modules['input'].start_enqueuing_threads(hypes, subqueues[model], 'train', sess) target_file = os.path.join(meta_hypes['dirs']['output_dir'], 'hypes.json') with open(target_file, 'w') as outfile: json.dump(meta_hypes, outfile, indent=2, sort_keys=True) return subhypes, submodules, subgraph, tv_sess
def do_training(hypes): """ Train model for a number of steps. This trains the model for at most hypes['solver']['max_steps']. It shows an update every utils.cfg.step_show steps and writes the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval steps. Paramters --------- hypes : dict Hyperparameters """ # Get the sets of images and labels for training, validation, and # test on MNIST. modules = utils.load_modules_from_hypes(hypes) # Tell TensorFlow that the model will be built into the default Graph. with tf.Session() as sess: # build the graph based on the loaded modules with tf.name_scope("Queues"): queue = modules['input'].create_queues(hypes, 'train') regression_weights = tf.placeholder(dtype=tf.float32, shape=(3,)) hypes['solver']['regression_weights'] = regression_weights tv_graph = core.build_training_graph(hypes, queue, modules) # prepaire the tv session tv_sess = core.start_tv_session(hypes) with tf.name_scope('Validation'): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) calib = tf.placeholder(tf.float32, shape=[1, hypes['grid_height'], hypes['grid_width'], 3, 4]) xy_scale = tf.placeholder(tf.float32, shape=[1, hypes['grid_height'], hypes['grid_width'], 2]) image = tf.expand_dims(image_pl, 0) image.set_shape([1, 384, 1248, 3]) inf_out, encoder_out = core.build_inference_graph(hypes, modules, image, calib, xy_scale) tv_graph['image_pl'] = image_pl tv_graph['inf_out'] = inf_out tv_graph['calib_pl'] = calib tv_graph['xy_scale_pl'] = xy_scale tv_graph['encoder_out'] = encoder_out all_variables = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES) sess.run(tf.variables_initializer(all_variables)) var_list = [var for var in all_variables if "beta" not in var.name and 'Adam' not in var.name] saver = tf.train.Saver(var_list=var_list) saver.restore(sess, hypes['pretrained']) # Start the data load modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess) # And then after everything is built, start the training loop. run_training(hypes, modules, tv_graph, tv_sess) # stopping input Threads tv_sess['coord'].request_stop() tv_sess['coord'].join(tv_sess['threads'])
def main(_): utils.set_gpus_to_use() try: import tensorvision.train import tensorflow_fcn.utils except ImportError: logging.error("Could not import the submodules.") logging.error("Please execute:" "'git submodule update --init --recursive'") exit(1) with open(tf.app.flags.FLAGS.hypes, 'r') as f: logging.info("f: %s", f) hypes = json.load(f) utils.load_plugins() if 'TV_DIR_RUNS' in os.environ: runs_dir = os.path.join(os.environ['TV_DIR_RUNS'], 'KittiSeg') else: runs_dir = 'RUNS' utils.set_dirs(hypes, tf.app.flags.FLAGS.hypes) utils._add_paths_to_sys(hypes) train.maybe_download_and_extract(hypes) maybe_download_and_extract(runs_dir) logging.info("Trimming weights.") logdir = os.path.join(runs_dir, FLAGS.RUN) modules = utils.load_modules_from_hypes(hypes) with tf.Graph().as_default(): # build the graph based on the loaded modules with tf.name_scope("Queues"): queue = modules['input'].create_queues(hypes, 'train') tv_graph = core.build_training_graph(hypes, queue, modules) # prepare the tv session with tf.Session().as_default(): tv_sess = core.start_tv_session(hypes) sess = tv_sess['sess'] saver = tv_sess['saver'] cur_step = core.load_weights(logdir, sess, saver) if cur_step is None: logging.warning("Loaded global_step is None.") logging.warning("This could mean," " that no weights have been loaded.") logging.warning("Starting Training with step 0.") cur_step = 0 with tf.name_scope('Validation'): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) image.set_shape([1, None, None, 3]) inf_out = core.build_inference_graph(hypes, modules, image=image) tv_graph['image_pl'] = image_pl tv_graph['inf_out'] = inf_out # prepaire the tv session image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) image.set_shape([1, None, None, 3]) inf_out = core.build_inference_graph(hypes, modules, image=image) # Create a session for running Ops on the Graph. trim_dir = 'RUNS/trimmed' shutil.copytree(logdir, trim_dir) shutil.copy(tf.app.flags.FLAGS.hypes, os.path.join(trim_dir, 'model_files', 'hypes.json')) sess = tf.Session() saver = tf.train.Saver() core.load_weights(trim_dir, sess, saver) for weight in tf.contrib.model_pruning.get_masks(): if any([ layer in weight.name for layer in hypes['layer_pruning']['layers'] ]): weight_value = tv_sess['sess'].run(weight) kernel_count = int(weight_value.shape[3] * hypes['layer_pruning']['layer_sparsity']) l1_values = np.sum(np.abs(weight_value), axis=(0, 1, 2)) toss_kernels = l1_values.argsort()[:kernel_count] weight_value[:, :, :, toss_kernels] = 0 assign_op = tf.assign(weight, tf.constant(weight_value)) tv_sess['sess'].run(assign_op) checkpoint_path = os.path.join(trim_dir, 'model.ckpt') tv_sess['saver'].save(sess, checkpoint_path, global_step=cur_step) train.continue_training(trim_dir)
def load_united_model(logdir): subhypes = {} subgraph = {} submodules = {} subqueues = {} subgraph['debug_ops'] = {} first_iter = True meta_hypes = utils.load_hypes_from_logdir(logdir, subdir="", base_path='hypes') for model in meta_hypes['model_list']: subhypes[model] = utils.load_hypes_from_logdir(logdir, subdir=model) hypes = subhypes[model] hypes['dirs']['output_dir'] = meta_hypes['dirs']['output_dir'] hypes['dirs']['image_dir'] = meta_hypes['dirs']['image_dir'] hypes['dirs']['data_dir'] = meta_hypes['dirs']['data_dir'] submodules[model] = utils.load_modules_from_logdir(logdir, dirname=model, postfix=model) modules = submodules[model] logging.info("Build %s computation Graph.", model) with tf.name_scope("Queues_%s" % model): subqueues[model] = modules['input'].create_queues(hypes, 'train') logging.info('Building Model: %s' % model) subgraph[model] = build_training_graph(hypes, subqueues[model], modules, first_iter) first_iter = False if len(meta_hypes['model_list']) == 2: _recombine_2_losses(meta_hypes, subgraph, subhypes, submodules) else: _recombine_3_losses(meta_hypes, subgraph, subhypes, submodules) hypes = subhypes[meta_hypes['model_list'][0]] tv_sess = core.start_tv_session(hypes) sess = tv_sess['sess'] saver = tv_sess['saver'] cur_step = core.load_weights(logdir, sess, saver) for model in meta_hypes['model_list']: hypes = subhypes[model] modules = submodules[model] optimizer = modules['solver'] with tf.name_scope('Validation_%s' % model): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) image = tf.expand_dims(image_pl, 0) inf_out = core.build_inference_graph(hypes, modules, image=image) subgraph[model]['image_pl'] = image_pl subgraph[model]['inf_out'] = inf_out # Start the data load modules['input'].start_enqueuing_threads(hypes, subqueues[model], 'train', sess) target_file = os.path.join(meta_hypes['dirs']['output_dir'], 'hypes.json') with open(target_file, 'w') as outfile: json.dump(meta_hypes, outfile, indent=2, sort_keys=True) return meta_hypes, subhypes, submodules, subgraph, tv_sess, cur_step
def build_united_model(meta_hypes): logging.info("Initialize training folder") subhypes = {} subgraph = {} submodules = {} subqueues = {} subgraph['debug_ops'] = {} base_path = meta_hypes['dirs']['base_path'] first_iter = True for model in meta_hypes['model_list']: subhypes_file = os.path.join(base_path, meta_hypes['models'][model]) with open(subhypes_file, 'r') as f: logging.info("f: %s", f) subhypes[model] = json.load(f) hypes = subhypes[model] utils.set_dirs(hypes, subhypes_file) #output directories hypes['dirs']['output_dir'] = meta_hypes['dirs']['output_dir'] #data directories hypes['dirs']['data_dir'] = meta_hypes['dirs']['data_dir'] #initialize the training folders train.initialize_training_folder(hypes, files_dir=model, logging=first_iter) #get the image directory from hypes meta_hypes['dirs']['image_dir'] = hypes['dirs']['image_dir'] #load the modules from hypes submodules[model] = utils.load_modules_from_hypes( hypes, postfix="_%s" % model) modules = submodules[model] logging.info("Build %s computation Graph.", model) with tf.name_scope("Queues_%s" % model): subqueues[model] = modules['input'].create_queues(hypes, 'train') logging.info('Building Model: %s' % model) subgraph[model] = build_training_graph(hypes, subqueues[model], modules, first_iter) first_iter = False #if model list is having detection and segmentation if len(meta_hypes['models']) == 2: #calculate the losses _recombine_2_losses(meta_hypes, subgraph, subhypes, submodules) #if model list is having detection, segmentation and classification else: #calculate combined losses _recombine_3_losses(meta_hypes, subgraph, subhypes, submodules) hypes = subhypes[meta_hypes['model_list'][0]] tv_sess = core.start_tv_session(hypes) sess = tv_sess['sess'] #for each of the models in model list expand the image dimension for model in meta_hypes['model_list']: hypes = subhypes[model] modules = submodules[model] optimizer = modules['solver'] with tf.name_scope('Validation_%s' % model): tf.get_variable_scope().reuse_variables() #returns the tensor that may be used as handle for feeding a value image_pl = tf.placeholder(tf.float32) #expand the shape of the array by inserting new axes in 0th positon image = tf.expand_dims(image_pl, 0) inf_out = core.build_inference_graph(hypes, modules, image=image) subgraph[model]['image_pl'] = image_pl subgraph[model]['inf_out'] = inf_out # Start the data load modules['input'].start_enqueuing_threads(hypes, subqueues[model], 'train', sess) #join paths for the output result target_file = os.path.join(meta_hypes['dirs']['output_dir'], 'hypes.json') with open(target_file, 'w') as outfile: json.dump(meta_hypes, outfile, indent=2, sort_keys=True) return subhypes, submodules, subgraph, tv_sess
def do_evaling(hypes): """ eval model for a number of steps. This trains the model for at most hypes['solver']['max_steps']. It shows an update every utils.cfg.step_show steps and writes the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval steps. Paramters --------- hypes : dict Hyperparameters """ # Get the sets of images and labels for training, validation, and # test on MNIST. modules = utils.load_modules_from_hypes(hypes) # Tell TensorFlow that the model will be built into the default Graph. with tf.Session() as sess: # build the graph based on the loaded modules with tf.name_scope("Queues"): queue = modules['input'].create_queues(hypes, 'train') tv_graph = core.build_training_graph(hypes, queue, modules) # prepaire the tv session tv_sess = core.start_tv_session(hypes) with tf.name_scope('Validation'): tf.get_variable_scope().reuse_variables() image_pl = tf.placeholder(tf.float32) calib = tf.placeholder( tf.float32, shape=[1, hypes['grid_height'], hypes['grid_width'], 3, 4]) xy_scale = tf.placeholder( tf.float32, shape=[1, hypes['grid_height'], hypes['grid_width'], 2]) image = tf.expand_dims(image_pl, 0) image.set_shape([1, 384, 1248, 3]) inf_out = core.build_inference_graph(hypes, modules, image, calib, xy_scale) tv_graph['image_pl'] = image_pl tv_graph['inf_out'] = inf_out tv_graph['calib_pl'] = calib tv_graph['xy_scale_pl'] = xy_scale all_variables = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES) sess.run(tf.variables_initializer(all_variables)) # Start the data load modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess) saver = tf.train.Saver() print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(hypes["dirs"]["ckpt_dir"]) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) # And then after everything is built, start the training loop. run_evaling(hypes, modules, tv_graph, tv_sess) # stopping input Threads tv_sess['coord'].request_stop() tv_sess['coord'].join(tv_sess['threads'])
def do_inference(hypes, modules, logdir): """ Analyze a trained model. This will load model files and weights found in logdir and run a basic analysis. Paramters --------- logdir : string folder with logs """ data_input, arch, objective, solver = modules data_dir = hypes['dirs']['data_dir'] if 'TV_DIR_DATA' in os.environ: data_dir = os.environ['TV_DIR_DATA'] hypes['dirs']['data_dir'] = data_dir hypes['dirs']['output_dir'] = logdir # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): image_pl, label_pl = _create_input_placeholder() image = tf.expand_dims(image_pl, 0) if 'whitening' not in hypes['arch'] or \ hypes['arch']['whitening']: image = tf.image.per_image_whitening(image) logging.info('Whitening is enabled.') else: logging.info('Whitening is disabled.') # build the graph based on the loaded modules softmax = build_inference_graph(hypes, modules, image=image, label=label_pl) # prepaire the tv session sess_coll = core.start_tv_session(hypes) sess, saver, summary_op, summary_writer, coord, threads = sess_coll _load_weights(logdir, sess, saver) _prepare_output_folder(hypes, logdir) val_json = os.path.join(hypes['dirs']['eval_out'], 'val.json') if FLAGS.inspect: if not os.path.exists(val_json): logging.error("File does not exist: %s", val_json) logging.error("Please run kitti_eval in normal mode first.") exit(1) else: with open(val_json, 'r') as f: eval_dict = json.load(f) logging.debug(eval_dict) from IPython import embed embed() exit(0) logging.info("Doing evaluation with Validation Data") val_file = os.path.join(hypes['dirs']['data_dir'], hypes['data']['val_file']) eval_dict = eval_dataset(hypes, val_file, True, sess, image_pl, softmax) with open(val_json, 'w') as outfile: # json.dump(eval_dict, outfile, indent=2) logging.info("Dumping currently not supported") logging.info("Succesfully evaluated Dataset. Output is written to %s", val_json) logging_file = os.path.join(hypes['dirs']['eval_out'], 'eval.log') filewriter = _get_filewrite_handler(logging_file) rootlog = logging.getLogger('') rootlog.addHandler(filewriter) logging.info('Statistics on Validation Data.') logging.info('MaxF1 : %4.2f', 100 * eval_dict['MaxF']) logging.info('BestThresh : %4.2f', 100 * eval_dict['BestThresh']) logging.info('Avg Precision : %4.2f', 100 * eval_dict['AvgPrec']) logging.info('') ind5 = np.where(eval_dict['thresh'] >= 0.5)[0][0] logging.info('Precision @ 0.5: %4.2f', 100 * eval_dict['precision'][ind5]) logging.info('Recall @ 0.5: %4.2f', 100 * eval_dict['recall'][ind5]) logging.info('TPR @ 0.5: %4.2f', 100 * eval_dict['recall'][ind5]) logging.info('TNR @ 0.5: %4.2f', 100 * eval_dict['TNR'][ind5]) if FLAGS.kitti_eval: do_kitti_eval_with_training_data(hypes, sess, image_pl, softmax) rootlog.removeHandler(filewriter) ana.do_analyze(FLAGS.logdir)
def load_united_model(logdir): subhypes = {} subgraph = {} submodules = {} subqueues = {} subgraph['debug_ops'] = {} first_iter = True #load the hypes meta_hypes = utils.load_hypes_from_logdir(logdir, subdir="", base_path='hypes') #get the models from model_list in hypes for model in meta_hypes['model_list']: subhypes[model] = utils.load_hypes_from_logdir(logdir, subdir=model) hypes = subhypes[model] #get the output directory hypes['dirs']['output_dir'] = meta_hypes['dirs']['output_dir'] #image input directory hypes['dirs']['image_dir'] = meta_hypes['dirs']['image_dir'] #training data directory hypes['dirs']['data_dir'] = meta_hypes['dirs']['data_dir'] submodules[model] = utils.load_modules_from_logdir(logdir, dirname=model, postfix=model) modules = submodules[model] logging.info("Build %s computation Graph.", model) #build the computational graph with tf.name_scope("Queues_%s" % model): subqueues[model] = modules['input'].create_queues(hypes, 'train') logging.info('Building Model: %s' % model) #build the model subgraph[model] = build_training_graph(hypes, subqueues[model], modules, first_iter) first_iter = False #if model list is having detection and segmentation if len(meta_hypes['model_list']) == 2: #call the function to calculate the losses _recombine_2_losses(meta_hypes, subgraph, subhypes, submodules) else: _recombine_3_losses(meta_hypes, subgraph, subhypes, submodules) hypes = subhypes[meta_hypes['model_list'][0]] #using the context manager launch the graph in session tv_sess = core.start_tv_session(hypes) sess = tv_sess['sess'] saver = tv_sess['saver'] #load weights cur_step = core.load_weights(logdir, sess, saver) #for each of the models in model list expand the image dimension for model in meta_hypes['model_list']: hypes = subhypes[model] modules = submodules[model] optimizer = modules['solver'] with tf.name_scope('Validation_%s' % model): tf.get_variable_scope().reuse_variables() #returns the tensor that may be used as handle for feeding a value image_pl = tf.placeholder(tf.float32) #expand the shape of the array by inserting new axes in 0th positon image = tf.expand_dims(image_pl, 0) inf_out = core.build_inference_graph(hypes, modules, image=image) subgraph[model]['image_pl'] = image_pl subgraph[model]['inf_out'] = inf_out # Start the data load modules['input'].start_enqueuing_threads(hypes, subqueues[model], 'train', sess) target_file = os.path.join(meta_hypes['dirs']['output_dir'], 'hypes.json') with open(target_file, 'w') as outfile: json.dump(meta_hypes, outfile, indent=2, sort_keys=True) return meta_hypes, subhypes, submodules, subgraph, tv_sess, cur_step