Exemple #1
0
def continue_training(logdir):
    """
    Continues training of a model.

    This will load model files and weights found in logdir and continues
    an aborted training.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)
        sess = tv_sess['sess']
        saver = tv_sess['saver']

        logging_file = os.path.join(logdir, 'output.log')
        utils.create_filewrite_handler(logging_file, mode='a')

        logging.info("Continue training.")

        cur_step = core.load_weights(logdir, sess, saver)
        if cur_step is None:
            logging.warning("Loaded global_step is None.")
            logging.warning("This could mean,"
                            " that no weights have been loaded.")
            logging.warning("Starting Training with step 0.")
            cur_step = 0

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        run_training(hypes, modules, tv_graph, tv_sess, cur_step)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Exemple #2
0
def continue_training(logdir):
    """
    Continues training of a model.

    This will load model files and weights found in logdir and continues
    an aborted training.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)
    data_input, arch, objective, solver = modules

    # append output to output.log
    logging_file = os.path.join(logdir, 'output.log')
    utils.create_filewrite_handler(logging_file, mode='a')

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default() as graph:

        # build the graph based on the loaded modules
        graph_ops = build_training_graph(hypes, modules)
        q = graph_ops[0]

        # prepaire the tv session
        sess_coll = core.start_tv_session(hypes)
        sess, saver, summary_op, summary_writer, coord, threads = sess_coll

        if hasattr(objective, 'evaluate'):
            with tf.name_scope('Validation'):
                image_pl, label_pl = _create_input_placeholder()
                image = tf.expand_dims(image_pl, 0)
                softmax = core.build_inference_graph(hypes, modules,
                                                     image=image,
                                                     label=label_pl)

        # Load weights from logdir
        cur_step = core.load_weights(logdir, sess, saver)

        # Start the data load
        _start_enqueuing_threads(hypes, q, sess, data_input)

        # And then after everything is built, start the training loop.
        start_time = time.time()
        for step in xrange(cur_step+1, hypes['solver']['max_steps']):
            start_time = run_training_step(hypes, step, start_time,
                                           graph_ops, sess_coll, objective,
                                           image_pl, softmax)

        # stopping input Threads
        coord.request_stop()
        coord.join(threads)
Exemple #3
0
def continue_training(logdir):
    """
    Continues training of a model.

    This will load model files and weights found in logdir and continues
    an aborted training.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)
    data_input, arch, objective, solver = modules

    # append output to output.log
    logging_file = os.path.join(logdir, 'output.log')
    utils.create_filewrite_handler(logging_file, mode='a')

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default() as graph:

        # build the graph based on the loaded modules
        graph_ops = build_training_graph(hypes, modules)
        q = graph_ops[0]

        # prepaire the tv session
        sess_coll = core.start_tv_session(hypes)
        sess, saver, summary_op, summary_writer, coord, threads = sess_coll

        if hasattr(objective, 'evaluate'):
            with tf.name_scope('Validation'):
                image_pl, label_pl = _create_input_placeholder()
                image = tf.expand_dims(image_pl, 0)
                softmax = core.build_inference_graph(hypes, modules,
                                                     image=image,
                                                     label=label_pl)

        # Load weights from logdir
        cur_step = core.load_weights(logdir, sess, saver)

        # Start the data load
        _start_enqueuing_threads(hypes, q, sess, data_input)

        # And then after everything is built, start the training loop.
        start_time = time.time()
        for step in xrange(cur_step+1, hypes['solver']['max_steps']):
            start_time = run_training_step(hypes, step, start_time,
                                           graph_ops, sess_coll, modules,
                                           image_pl, softmax)

        # stopping input Threads
        coord.request_stop()
        coord.join(threads)
Exemple #4
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)

    # set to allocate memory on GPU as needed
    # For more details, look at
    # https://stackoverflow.com/questions/36927607/how-can-i-solve-ran-out-of-gpu-memory-in-tensorflow
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session(config=config) as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        train.run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Exemple #5
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)
    data_input, arch, objective, solver = modules

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():

        # build the graph based on the loaded modules
        graph_ops = build_training_graph(hypes, modules)
        q = graph_ops[0]

        # prepaire the tv session
        sess_coll = core.start_tv_session(hypes)
        sess, saver, summary_op, summary_writer, coord, threads = sess_coll

        with tf.name_scope('Validation'):
            image_pl, label_pl = _create_input_placeholder()
            image = tf.expand_dims(image_pl, 0)
            softmax = core.build_inference_graph(hypes, modules,
                                                 image=image,
                                                 label=label_pl)

        # Start the data load
        _start_enqueuing_threads(hypes, q, sess, data_input)

        # And then after everything is built, start the training loop.
        start_time = time.time()
        for step in xrange(hypes['solver']['max_steps']):
            start_time = run_training_step(hypes, step, start_time,
                                           graph_ops, sess_coll, modules,
                                           image_pl, softmax)
            if hasattr(solver, 'update_learning_rate'):
                solver.update_learning_rate(hypes, step)

        # stopping input Threads
        coord.request_stop()
        coord.join(threads)
Exemple #6
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)
    data_input, arch, objective, solver = modules

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():

        # build the graph based on the loaded modules
        graph_ops = build_training_graph(hypes, modules)
        q = graph_ops[0]

        # prepaire the tv session
        sess_coll = core.start_tv_session(hypes)
        sess, saver, summary_op, summary_writer, coord, threads = sess_coll

        with tf.name_scope('Validation'):
            image_pl, label_pl = _create_input_placeholder()
            image = tf.expand_dims(image_pl, 0)
            softmax = core.build_inference_graph(hypes, modules,
                                                 image=image,
                                                 label=label_pl)

        # Start the data load
        _start_enqueuing_threads(hypes, q, sess, data_input)

        # And then after everything is built, start the training loop.
        start_time = time.time()
        for step in xrange(hypes['solver']['max_steps']):
            start_time = run_training_step(hypes, step, start_time,
                                           graph_ops, sess_coll, objective,
                                           image_pl, softmax)
            if hasattr(solver, 'update_learning_rate'):
                solver.update_learning_rate(hypes, step)

        # stopping input Threads
        coord.request_stop()
        coord.join(threads)
Exemple #7
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        logging.info("..Creating queues")
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        logging.info("..Building training graph")
        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepare the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        logging.info("..Enqueuing files")
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        logging.info("..Initializing the training")
        run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Exemple #8
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules,
                                                 image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
def do_analyze(logdir):
    """
    Analyze a trained model.

    This will load model files and weights found in logdir and run a basic
    analysis.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)
    data_input, arch, objective, solver = modules

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():

        # prepaire the tv session

        with tf.name_scope('Validation'):
            image_pl, label_pl = _create_input_placeholder()
            image = tf.expand_dims(image_pl, 0)
            softmax = core.build_inference_graph(hypes, modules,
                                                 image=image,
                                                 label=label_pl)

        sess_coll = core.start_tv_session(hypes)
        sess, saver, summary_op, summary_writer, coord, threads = sess_coll

        core.load_weights(logdir, sess, saver)

        eval_dict, images = objective.tensor_eval(hypes, sess, image_pl,
                                                  softmax)

        logging_file = os.path.join(logdir, "eval/analysis.log")
        utils.create_filewrite_handler(logging_file)

        utils.print_eval_dict(eval_dict)
        _write_images_to_logdir(images, logdir)
    return
Exemple #10
0
def do_analyze(logdir):
    """
    Analyze a trained model.

    This will load model files and weights found in logdir and run a basic
    analysis.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)
    data_input, arch, objective, solver = modules

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():

        # prepaire the tv session

        with tf.name_scope('Validation'):
            image_pl, label_pl = _create_input_placeholder()
            image = tf.expand_dims(image_pl, 0)
            softmax = core.build_inference_graph(hypes, modules,
                                                 image=image,
                                                 label=label_pl)

        sess_coll = core.start_tv_session(hypes)
        sess, saver, summary_op, summary_writer, coord, threads = sess_coll

        core.load_weights(logdir, sess, saver)

        eval_dict, images = objective.tensor_eval(hypes, sess, image_pl,
                                                  softmax)

        logging_file = os.path.join(logdir, "eval/analysis.log")
        utils.create_filewrite_handler(logging_file)

        utils.print_eval_dict(eval_dict)
        _write_images_to_logdir(images, logdir)
    return
def do_analyze(logdir):
    """
    Analyze a trained model.

    This will load model files and weights found in logdir and run a basic
    analysis.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)
    data_input, arch, objective, solver = modules

    logging_file = os.path.join(logdir, "eval/analysis.log")
    utils.create_filewrite_handler(logging_file)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():

        # build the graph based on the loaded modules
        graph_ops = core.build_graph(hypes, modules, train=False)
        q, train_op, loss, eval_lists = graph_ops
        q = graph_ops[0]

        # prepaire the tv session
        sess_coll = core.start_tv_session(hypes)
        sess, saver, summary_op, summary_writer, coord, threads = sess_coll

        core.load_weights(logdir, sess, saver)
        # Start the data load
        data_input.start_enqueuing_threads(hypes, q['val'], 'val', sess,
                                           hypes['dirs']['data_dir'])

    return core.do_eval(hypes, eval_lists, 'val', sess)
def build_united_model(meta_hypes):

    logging.info("Initialize training folder")

    subhypes = {}
    subgraph = {}
    submodules = {}
    subqueues = {}

    subgraph['debug_ops'] = {}

    base_path = meta_hypes['dirs']['base_path']
    first_iter = True

    for model in meta_hypes['model_list']:
        subhypes_file = os.path.join(base_path, meta_hypes['models'][model])
        with open(subhypes_file, 'r') as f:
            logging.info("f: %s", f)
            subhypes[model] = json.load(f)

        hypes = subhypes[model]
        utils.set_dirs(hypes, subhypes_file)
        hypes['dirs']['output_dir'] = meta_hypes['dirs']['output_dir']
        hypes['dirs']['data_dir'] = meta_hypes['dirs']['data_dir']
        train.initialize_training_folder(hypes,
                                         files_dir=model,
                                         logging=first_iter)
        meta_hypes['dirs']['image_dir'] = hypes['dirs']['image_dir']
        submodules[model] = utils.load_modules_from_hypes(hypes,
                                                          postfix="_%s" %
                                                          model)
        modules = submodules[model]

        logging.info("Build %s computation Graph.", model)
        with tf.name_scope("Queues_%s" % model):
            subqueues[model] = modules['input'].create_queues(hypes, 'train')

        logging.info('Building Model: %s' % model)

        subgraph[model] = build_training_graph(hypes, subqueues[model],
                                               modules, first_iter)

        first_iter = False

    if len(meta_hypes['models']) == 2:
        _recombine_2_losses(meta_hypes, subgraph, subhypes, submodules)
    else:
        _recombine_3_losses(meta_hypes, subgraph, subhypes, submodules)

    hypes = subhypes[meta_hypes['model_list'][0]]

    tv_sess = core.start_tv_session(hypes)
    sess = tv_sess['sess']
    for model in meta_hypes['model_list']:
        hypes = subhypes[model]
        modules = submodules[model]
        optimizer = modules['solver']

        with tf.name_scope('Validation_%s' % model):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            subgraph[model]['image_pl'] = image_pl
            subgraph[model]['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, subqueues[model],
                                                 'train', sess)

    target_file = os.path.join(meta_hypes['dirs']['output_dir'], 'hypes.json')
    with open(target_file, 'w') as outfile:
        json.dump(meta_hypes, outfile, indent=2, sort_keys=True)

    return subhypes, submodules, subgraph, tv_sess
Exemple #13
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')
        
        regression_weights = tf.placeholder(dtype=tf.float32, shape=(3,))
        hypes['solver']['regression_weights'] = regression_weights

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            calib = tf.placeholder(tf.float32, shape=[1, hypes['grid_height'], hypes['grid_width'], 3, 4])
            xy_scale = tf.placeholder(tf.float32, shape=[1, hypes['grid_height'], hypes['grid_width'], 2])
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, 384, 1248, 3])
            inf_out, encoder_out = core.build_inference_graph(hypes, modules,
                                                 image, calib, xy_scale)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out
            tv_graph['calib_pl'] = calib
            tv_graph['xy_scale_pl'] = xy_scale
            tv_graph['encoder_out'] = encoder_out


        all_variables = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES)
        sess.run(tf.variables_initializer(all_variables))
        var_list = [var for var in all_variables if "beta" not in var.name and 'Adam' not in var.name]    
  
        saver = tf.train.Saver(var_list=var_list)
        saver.restore(sess, hypes['pretrained'])
     
        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Exemple #14
0
def main(_):
    utils.set_gpus_to_use()

    try:
        import tensorvision.train
        import tensorflow_fcn.utils
    except ImportError:
        logging.error("Could not import the submodules.")
        logging.error("Please execute:"
                      "'git submodule update --init --recursive'")
        exit(1)

    with open(tf.app.flags.FLAGS.hypes, 'r') as f:
        logging.info("f: %s", f)
        hypes = json.load(f)
    utils.load_plugins()

    if 'TV_DIR_RUNS' in os.environ:
        runs_dir = os.path.join(os.environ['TV_DIR_RUNS'], 'KittiSeg')
    else:
        runs_dir = 'RUNS'

    utils.set_dirs(hypes, tf.app.flags.FLAGS.hypes)
    utils._add_paths_to_sys(hypes)
    train.maybe_download_and_extract(hypes)
    maybe_download_and_extract(runs_dir)

    logging.info("Trimming weights.")
    logdir = os.path.join(runs_dir, FLAGS.RUN)
    modules = utils.load_modules_from_hypes(hypes)

    with tf.Graph().as_default():

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepare the tv session
        with tf.Session().as_default():
            tv_sess = core.start_tv_session(hypes)
        sess = tv_sess['sess']
        saver = tv_sess['saver']

        cur_step = core.load_weights(logdir, sess, saver)
        if cur_step is None:
            logging.warning("Loaded global_step is None.")
            logging.warning("This could mean,"
                            " that no weights have been loaded.")
            logging.warning("Starting Training with step 0.")
            cur_step = 0

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # prepaire the tv session
        image_pl = tf.placeholder(tf.float32)
        image = tf.expand_dims(image_pl, 0)
        image.set_shape([1, None, None, 3])
        inf_out = core.build_inference_graph(hypes, modules, image=image)

        # Create a session for running Ops on the Graph.
        trim_dir = 'RUNS/trimmed'
        shutil.copytree(logdir, trim_dir)
        shutil.copy(tf.app.flags.FLAGS.hypes,
                    os.path.join(trim_dir, 'model_files', 'hypes.json'))
        sess = tf.Session()
        saver = tf.train.Saver()
        core.load_weights(trim_dir, sess, saver)

        for weight in tf.contrib.model_pruning.get_masks():
            if any([
                    layer in weight.name
                    for layer in hypes['layer_pruning']['layers']
            ]):
                weight_value = tv_sess['sess'].run(weight)
                kernel_count = int(weight_value.shape[3] *
                                   hypes['layer_pruning']['layer_sparsity'])

                l1_values = np.sum(np.abs(weight_value), axis=(0, 1, 2))
                toss_kernels = l1_values.argsort()[:kernel_count]
                weight_value[:, :, :, toss_kernels] = 0
                assign_op = tf.assign(weight, tf.constant(weight_value))
                tv_sess['sess'].run(assign_op)

        checkpoint_path = os.path.join(trim_dir, 'model.ckpt')
        tv_sess['saver'].save(sess, checkpoint_path, global_step=cur_step)

    train.continue_training(trim_dir)
def load_united_model(logdir):
    subhypes = {}
    subgraph = {}
    submodules = {}
    subqueues = {}

    subgraph['debug_ops'] = {}

    first_iter = True

    meta_hypes = utils.load_hypes_from_logdir(logdir,
                                              subdir="",
                                              base_path='hypes')
    for model in meta_hypes['model_list']:
        subhypes[model] = utils.load_hypes_from_logdir(logdir, subdir=model)
        hypes = subhypes[model]
        hypes['dirs']['output_dir'] = meta_hypes['dirs']['output_dir']
        hypes['dirs']['image_dir'] = meta_hypes['dirs']['image_dir']
        hypes['dirs']['data_dir'] = meta_hypes['dirs']['data_dir']
        submodules[model] = utils.load_modules_from_logdir(logdir,
                                                           dirname=model,
                                                           postfix=model)

        modules = submodules[model]

        logging.info("Build %s computation Graph.", model)
        with tf.name_scope("Queues_%s" % model):
            subqueues[model] = modules['input'].create_queues(hypes, 'train')

        logging.info('Building Model: %s' % model)

        subgraph[model] = build_training_graph(hypes, subqueues[model],
                                               modules, first_iter)

        first_iter = False

    if len(meta_hypes['model_list']) == 2:
        _recombine_2_losses(meta_hypes, subgraph, subhypes, submodules)
    else:
        _recombine_3_losses(meta_hypes, subgraph, subhypes, submodules)

    hypes = subhypes[meta_hypes['model_list'][0]]

    tv_sess = core.start_tv_session(hypes)
    sess = tv_sess['sess']
    saver = tv_sess['saver']

    cur_step = core.load_weights(logdir, sess, saver)
    for model in meta_hypes['model_list']:
        hypes = subhypes[model]
        modules = submodules[model]
        optimizer = modules['solver']

        with tf.name_scope('Validation_%s' % model):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            subgraph[model]['image_pl'] = image_pl
            subgraph[model]['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, subqueues[model],
                                                 'train', sess)

    target_file = os.path.join(meta_hypes['dirs']['output_dir'], 'hypes.json')
    with open(target_file, 'w') as outfile:
        json.dump(meta_hypes, outfile, indent=2, sort_keys=True)

    return meta_hypes, subhypes, submodules, subgraph, tv_sess, cur_step
Exemple #16
0
def continue_training(logdir):
    """
    Continues training of a model.

    This will load model files and weights found in logdir and continues
    an aborted training.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)
        sess = tv_sess['sess']
        saver = tv_sess['saver']

        logging_file = os.path.join(logdir, 'output.log')
        utils.create_filewrite_handler(logging_file, mode='a')

        logging.info("Continue training.")

        cur_step = core.load_weights(logdir, sess, saver)
        if cur_step is None:
            logging.warning("Loaded global_step is None.")
            logging.warning("This could mean,"
                            " that no weights have been loaded.")
            logging.warning("Starting Training with step 0.")
            cur_step = 0

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules,
                                                 image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        run_training(hypes, modules, tv_graph, tv_sess, cur_step)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Exemple #17
0
def build_united_model(meta_hypes):

    logging.info("Initialize training folder")

    subhypes = {}
    subgraph = {}
    submodules = {}
    subqueues = {}

    subgraph['debug_ops'] = {}

    base_path = meta_hypes['dirs']['base_path']
    first_iter = True

    for model in meta_hypes['model_list']:
        subhypes_file = os.path.join(base_path, meta_hypes['models'][model])
        with open(subhypes_file, 'r') as f:
            logging.info("f: %s", f)
            subhypes[model] = json.load(f)

        hypes = subhypes[model]
        utils.set_dirs(hypes, subhypes_file)
        #output directories
        hypes['dirs']['output_dir'] = meta_hypes['dirs']['output_dir']
        #data directories
        hypes['dirs']['data_dir'] = meta_hypes['dirs']['data_dir']
        #initialize the training folders
        train.initialize_training_folder(hypes, files_dir=model,
                                         logging=first_iter)
        #get the image directory from hypes
        meta_hypes['dirs']['image_dir'] = hypes['dirs']['image_dir']
        #load the modules from hypes
        submodules[model] = utils.load_modules_from_hypes(
            hypes, postfix="_%s" % model)
        modules = submodules[model]

        logging.info("Build %s computation Graph.", model)
        with tf.name_scope("Queues_%s" % model):
            subqueues[model] = modules['input'].create_queues(hypes, 'train')

        logging.info('Building Model: %s' % model)

        subgraph[model] = build_training_graph(hypes,
                                               subqueues[model],
                                               modules,
                                               first_iter)

        first_iter = False
    #if model list is having detection and segmentation
    if len(meta_hypes['models']) == 2:
        #calculate the losses 
        _recombine_2_losses(meta_hypes, subgraph, subhypes, submodules)
    #if model list is having detection, segmentation and classification
    else:
        #calculate combined losses
        _recombine_3_losses(meta_hypes, subgraph, subhypes, submodules)

    hypes = subhypes[meta_hypes['model_list'][0]]

    tv_sess = core.start_tv_session(hypes)
    sess = tv_sess['sess']
    #for each of the models in model list expand the image dimension
    for model in meta_hypes['model_list']:
        hypes = subhypes[model]
        modules = submodules[model]
        optimizer = modules['solver']

        with tf.name_scope('Validation_%s' % model):
            tf.get_variable_scope().reuse_variables()
            #returns the tensor that may be used as handle for feeding a value
            image_pl = tf.placeholder(tf.float32)
            #expand the shape of the array by inserting new axes in 0th positon
            image = tf.expand_dims(image_pl, 0)
            inf_out = core.build_inference_graph(hypes, modules,
                                                 image=image)
            subgraph[model]['image_pl'] = image_pl
            subgraph[model]['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, subqueues[model],
                                                 'train', sess)

    #join paths for the output result
    target_file = os.path.join(meta_hypes['dirs']['output_dir'], 'hypes.json')
    with open(target_file, 'w') as outfile:
        json.dump(meta_hypes, outfile, indent=2, sort_keys=True)

    return subhypes, submodules, subgraph, tv_sess
Exemple #18
0
def do_evaling(hypes):
    """
    eval model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:
        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            calib = tf.placeholder(
                tf.float32,
                shape=[1, hypes['grid_height'], hypes['grid_width'], 3, 4])
            xy_scale = tf.placeholder(
                tf.float32,
                shape=[1, hypes['grid_height'], hypes['grid_width'], 2])
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, 384, 1248, 3])
            inf_out = core.build_inference_graph(hypes, modules, image, calib,
                                                 xy_scale)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out
            tv_graph['calib_pl'] = calib
            tv_graph['xy_scale_pl'] = xy_scale

        all_variables = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES)
        sess.run(tf.variables_initializer(all_variables))

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)
        saver = tf.train.Saver()
        print("Reading checkpoints...")
        ckpt = tf.train.get_checkpoint_state(hypes["dirs"]["ckpt_dir"])
        if ckpt and ckpt.model_checkpoint_path:
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                '-')[-1]
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('Loading success, global_step is %s' % global_step)

        # And then after everything is built, start the training loop.
        run_evaling(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Exemple #19
0
def do_inference(hypes, modules, logdir):
    """
    Analyze a trained model.

    This will load model files and weights found in logdir and run a basic
    analysis.

    Paramters
    ---------
    logdir : string
        folder with logs
    """
    data_input, arch, objective, solver = modules

    data_dir = hypes['dirs']['data_dir']
    if 'TV_DIR_DATA' in os.environ:
        data_dir = os.environ['TV_DIR_DATA']
        hypes['dirs']['data_dir'] = data_dir
        hypes['dirs']['output_dir'] = logdir

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():

        image_pl, label_pl = _create_input_placeholder()

        image = tf.expand_dims(image_pl, 0)

        if 'whitening' not in hypes['arch'] or \
                hypes['arch']['whitening']:
            image = tf.image.per_image_whitening(image)
            logging.info('Whitening is enabled.')
        else:
            logging.info('Whitening is disabled.')

        # build the graph based on the loaded modules
        softmax = build_inference_graph(hypes,
                                        modules,
                                        image=image,
                                        label=label_pl)

        # prepaire the tv session
        sess_coll = core.start_tv_session(hypes)
        sess, saver, summary_op, summary_writer, coord, threads = sess_coll

        _load_weights(logdir, sess, saver)

    _prepare_output_folder(hypes, logdir)

    val_json = os.path.join(hypes['dirs']['eval_out'], 'val.json')

    if FLAGS.inspect:
        if not os.path.exists(val_json):
            logging.error("File does not exist: %s", val_json)
            logging.error("Please run kitti_eval in normal mode first.")
            exit(1)
        else:
            with open(val_json, 'r') as f:
                eval_dict = json.load(f)
                logging.debug(eval_dict)
                from IPython import embed
                embed()
                exit(0)

    logging.info("Doing evaluation with Validation Data")
    val_file = os.path.join(hypes['dirs']['data_dir'],
                            hypes['data']['val_file'])
    eval_dict = eval_dataset(hypes, val_file, True, sess, image_pl, softmax)

    with open(val_json, 'w') as outfile:
        # json.dump(eval_dict, outfile, indent=2)
        logging.info("Dumping currently not supported")

    logging.info("Succesfully evaluated Dataset. Output is written to %s",
                 val_json)

    logging_file = os.path.join(hypes['dirs']['eval_out'], 'eval.log')
    filewriter = _get_filewrite_handler(logging_file)
    rootlog = logging.getLogger('')
    rootlog.addHandler(filewriter)

    logging.info('Statistics on Validation Data.')

    logging.info('MaxF1          : %4.2f', 100 * eval_dict['MaxF'])
    logging.info('BestThresh     : %4.2f', 100 * eval_dict['BestThresh'])
    logging.info('Avg Precision  : %4.2f', 100 * eval_dict['AvgPrec'])
    logging.info('')
    ind5 = np.where(eval_dict['thresh'] >= 0.5)[0][0]
    logging.info('Precision @ 0.5: %4.2f', 100 * eval_dict['precision'][ind5])
    logging.info('Recall    @ 0.5: %4.2f', 100 * eval_dict['recall'][ind5])
    logging.info('TPR       @ 0.5: %4.2f', 100 * eval_dict['recall'][ind5])
    logging.info('TNR       @ 0.5: %4.2f', 100 * eval_dict['TNR'][ind5])

    if FLAGS.kitti_eval:
        do_kitti_eval_with_training_data(hypes, sess, image_pl, softmax)

    rootlog.removeHandler(filewriter)

    ana.do_analyze(FLAGS.logdir)
Exemple #20
0
def load_united_model(logdir):
    subhypes = {}
    subgraph = {}
    submodules = {}
    subqueues = {}

    subgraph['debug_ops'] = {}

    first_iter = True
    #load the hypes
    meta_hypes = utils.load_hypes_from_logdir(logdir, subdir="",
                                              base_path='hypes')
    #get the models from model_list in hypes
    for model in meta_hypes['model_list']:
        subhypes[model] = utils.load_hypes_from_logdir(logdir, subdir=model)
        hypes = subhypes[model]
        #get the output directory
        hypes['dirs']['output_dir'] = meta_hypes['dirs']['output_dir']
        #image input directory
        hypes['dirs']['image_dir'] = meta_hypes['dirs']['image_dir']
        #training data directory
        hypes['dirs']['data_dir'] = meta_hypes['dirs']['data_dir']
        submodules[model] = utils.load_modules_from_logdir(logdir,
                                                           dirname=model,
                                                           postfix=model)

        modules = submodules[model]

        logging.info("Build %s computation Graph.", model)
        #build the computational graph
        with tf.name_scope("Queues_%s" % model):
            subqueues[model] = modules['input'].create_queues(hypes, 'train')

        logging.info('Building Model: %s' % model)
        #build the model
        subgraph[model] = build_training_graph(hypes,
                                               subqueues[model],
                                               modules,
                                               first_iter)

        first_iter = False
    #if model list is having detection and segmentation
    if len(meta_hypes['model_list']) == 2:
        #call the function to calculate the losses
        _recombine_2_losses(meta_hypes, subgraph, subhypes, submodules)
    else:
        _recombine_3_losses(meta_hypes, subgraph, subhypes, submodules)

    hypes = subhypes[meta_hypes['model_list'][0]]
    #using the context manager launch the graph in session
    tv_sess = core.start_tv_session(hypes)
    sess = tv_sess['sess']
    saver = tv_sess['saver']
    #load weights
    cur_step = core.load_weights(logdir, sess, saver)
    #for each of the models in model list expand the image dimension
    for model in meta_hypes['model_list']:
        hypes = subhypes[model]
        modules = submodules[model]
        optimizer = modules['solver']

        with tf.name_scope('Validation_%s' % model):
            tf.get_variable_scope().reuse_variables()
            #returns the tensor that may be used as handle for feeding a value
            image_pl = tf.placeholder(tf.float32)
            #expand the shape of the array by inserting new axes in 0th positon
            image = tf.expand_dims(image_pl, 0)
            inf_out = core.build_inference_graph(hypes, modules,
                                                 image=image)
            subgraph[model]['image_pl'] = image_pl
            subgraph[model]['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, subqueues[model],
                                                 'train', sess)

    target_file = os.path.join(meta_hypes['dirs']['output_dir'], 'hypes.json')
    with open(target_file, 'w') as outfile:
        json.dump(meta_hypes, outfile, indent=2, sort_keys=True)

    return meta_hypes, subhypes, submodules, subgraph, tv_sess, cur_step