Example #1
0
def test_train(gpu_number):
    """ Initialize the input class to get the configuration """
    config = configMain()

    manager = DatasetManager(config.config_input)
    """ Get the batch tensor that is going to be used around """
    batch_tensor = manager.train.get_batch_tensor()
    batch_tensor_val = manager.validation.get_batch_tensor()
    config_gpu = tf.ConfigProto()
    config_gpu.gpu_options.visible_device_list = gpu_number
    sess = tf.Session(config=config_gpu)
    manager.start_training_queueing(sess)
    manager.start_validation_queueing(sess)

    training_manager = TrainManager(config.config_train)

    training_manager.build_network()

    training_manager.build_loss()

    training_manager.build_optimization()
    """ Initializing Session as variables that control the session """
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(tf.all_variables(), max_to_keep=0)
    """Load a previous model if it is configured to restore """
    cpkt = 0
    if config.config_train.restore:
        cpkt = restore_session(sess, saver, config.models_path)

    for i in range(10):  # RUn a few training steps

        training_manager.run_train_step(batch_tensor, sess, i)
Example #2
0
def evaluate(gpu_number, experiment_name):
    """ Initialize the input class to get the configuration """

    conf_module = __import__(experiment_name)

    config = conf_module.configMain()
    manager = DatasetManager(conf_module.configInput())
    """ Get the batch tensor that is going to be used around """
    batch_tensor = manager.train.get_batch_tensor()
    batch_tensor_val = manager.validation.get_batch_tensor()
    config_gpu = tf.ConfigProto()
    config_gpu.gpu_options.visible_device_list = gpu_number
    sess = tf.Session(config=config_gpu)

    manager.start_training_queueing(sess)
    manager.start_validation_queueing(sess)

    training_manager = TrainManager(conf_module.configTrain())

    training_manager.build_network()

    training_manager.build_loss()

    training_manager.build_optimization()
    """ Initializing Session as variables that control the session """
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=0)
    """Load a previous model if it is configured to restore """
    cpkt = 0
    if config.restore:
        cpkt = restore_session(sess, saver, config.models_path)

    # """Training"""

    # """ Get the Last Iteration Trained """

    initialIteration = get_last_iteration(cpkt)

    output_manager = OutputManager(conf_module.configOutput(),
                                   training_manager, sess, batch_tensor_val)
    # output_manager = output_class.get_output_manager()

    # # CREATE HERE THE TF SESSION

    for i in range(initialIteration, config.number_iterations):

        #   """ Get the training batch """

        #   """ Run the training step and monitor its execution time """

        #   #print "NEXT STEP GETING"

        training_manager.run_train_step(batch_tensor, sess, i)
        #   #print "RUNNED STEP"

        duration = time.time() - start_time

        #   """ With the current trained net, let the outputmanager print and save all the outputs """
        output_manager.print_outputs(i, duration)
Example #3
0
def load_system(config):
    config.batch_size = 1
    config.is_training = False

    training_manager = TrainManager(config)

    training_manager.build_network()

    return training_manager
def load_system(config):
    config.batch_size = 1
    config.is_training = False

    training_manager = TrainManager(config)

    training_manager.build_network()
    """ Initializing Session as variables that control the session """

    return training_manager
Example #5
0
def load_system(config):
    config.batch_size = 1
    config.is_training = False

    training_manager = TrainManager(config, None)
    if hasattr(config, 'seg_network_erfnet_one_hot'):
        training_manager.build_seg_network_erfnet_one_hot()
        print("Bulding: seg_network_erfnet_one_hot")
    else:
        training_manager.build_network()
        print("Bulding: standard_network")

    return training_manager
Example #6
0
def train(gpu_number, experiment_name):
    """ Initialize the input class to get the configuration """

    conf_module = __import__(experiment_name)

    config = conf_module.configMain()
    manager = DatasetManager(conf_module.configInput())
    """ Get the batch tensor that is going to be used around """
    batch_tensor = manager.train.get_batch_tensor()
    batch_tensor_val = manager.validation.get_batch_tensor()
    config_gpu = tf.ConfigProto()
    config_gpu.gpu_options.visible_device_list = gpu_number
    sess = tf.Session(config=config_gpu)

    manager.start_training_queueing(sess)
    manager.start_validation_queueing(sess)

    training_manager = TrainManager(conf_module.configTrain())

    print 'building network'
    training_manager.build_network()

    print 'building loss'
    training_manager.build_loss()

    print 'building optimization'
    training_manager.build_optimization()
    """ Initializing Session as variables that control the session """
    print 'initializing variables'
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=0)
    """Load a previous model if it is configured to restore """
    print 'restoring checkpoint'
    cpkt = 0
    if config.restore:
        cpkt = restore_session(sess, saver, config.models_path)

    # """Training"""

    # """ Get the Last Iteration Trained """

    initialIteration = get_last_iteration(cpkt)

    output_manager = OutputManager(conf_module.configOutput(),
                                   training_manager, sess, batch_tensor_val)
    # output_manager = output_class.get_output_manager()

    # # CREATE HERE THE TF SESSION

    print_flag = True

    for i in range(initialIteration, config.number_iterations):

        #   """ Get the training batch """

        #   start_time = time.time()
        #   """Save the model every 300 iterations"""
        start_time = time.time()
        if i % 6000 == 0:

            save_model(saver, sess, config.models_path, i)

        training_manager.run_train_step(batch_tensor, sess, i)
        #   #print "RUNNED STEP"

        duration = time.time() - start_time

        #   """ With the current trained net, let the outputmanager print and save all the outputs """
        output_manager.print_outputs(i, duration)

        if print_flag == True:
            #print entire batch(only once) to check if it is balanced
            np.savetxt(
                'batch_values.txt',
                output_manager._logger._last_batch_inputs,
                newline=' ',
            )
            print_flag = False
Example #7
0
def train(experiment_name, memory_fraction):
    """ Initialize the input class to get the configuration """
    conf_module = __import__(experiment_name)
    config_main = conf_module.configMain()
    config_input = conf_module.configInput()

    if config_input.use_perception_stack:
        use_mode = {}
        for key in config_input.perception_num_replicates:
            if config_input.perception_num_replicates[key] > 0:
                assert (config_input.batch_size % config_input.perception_batch_sizes[key] == 0)
                use_mode[key] = True
            else:
                use_mode[key] = False

        all_params = use_mode.copy()
        if hasattr(config_input, "perception_other_params"):
            all_params.update(config_input.perception_other_params)

        perception_interface = Perceptions(
            batch_size=config_input.perception_batch_sizes,
            gpu_assignment=config_input.perception_gpus,
            compute_methods={},
            viz_methods={},
            num_replicates=config_input.perception_num_replicates,
            path_config=config_input.perception_paths,
            **all_params
        )
        time.sleep(config_input.perception_initialization_sleep)

    else:
        perception_interface = None

    config_gpu = tf.ConfigProto()
    config_gpu.gpu_options.per_process_gpu_memory_fraction = float(memory_fraction)
    sess = tf.Session(config=config_gpu)


    dataset_manager = DatasetManager(conf_module.configInput(), perception_interface)
    batch_tensor = dataset_manager.train.get_batch_tensor()
    batch_tensor_val = dataset_manager.validation.get_batch_tensor()
    dataset_manager.start_training_queueing(sess)
    dataset_manager.start_validation_queueing(sess)

    training_manager = TrainManager(conf_module.configTrain(), None, placeholder_input=False, batch_tensor=batch_tensor)
    if hasattr(conf_module.configTrain(), 'seg_network_erfnet_one_hot'):
        print("Bulding: seg_network_erfnet_one_hot")
        training_manager.build_seg_network_erfnet_one_hot()
    else:
        print("Bulding: standard_network")
        training_manager.build_network()
    training_manager.build_loss()
    training_manager.build_optimization()

    sess.run(tf.global_variables_initializer())

    # leave the segmentation model there, since we want to rerun his model sometime
    if config_main.segmentation_model != None:
        print("the segmentation model name is: ", config_main.segmentation_model_name)
        variables_to_restore = slim.get_variables(
            scope=str(config_main.segmentation_model_name))
        saver = tf.train.Saver(variables_to_restore, max_to_keep=0)
        restore_session(sess, saver, config_main.segmentation_model)

        variables_to_restore = list(set(tf.global_variables()) - set(
            slim.get_variables(scope=str(config_main.segmentation_model_name))))
    else:
        variables_to_restore = tf.global_variables()

    saver = tf.train.Saver(variables_to_restore, max_to_keep=0)
    cpkt = restore_session(sess, saver, config_main.models_path)
    if not cpkt and hasattr(config_main, "reload_other_models"):
        cpkt = restore_session(sess, saver, config_main.reload_other_models)
        initialIteration = 0
    else:
        initialIteration = get_last_iteration(cpkt)

    all_saver = tf.train.Saver(tf.global_variables(), max_to_keep=0)

    # Creates a manager to manger the screen output and also validation outputs
    if config_main.output_is_on:
        output_manager = OutputManager(conf_module.configOutput(), training_manager, conf_module.configTrain(), sess,
                                       batch_tensor_val)

    # Creates a test manager that connects to a server and tests there constantly

    for i in range(initialIteration, config_main.number_iterations):
        start_time = time.time()
        if i % 3000 == 0:
            if config_main.segmentation_model != None:
                save_model(saver, sess, config_main.models_path + '/ctrl', i)
            save_model(all_saver, sess, config_main.models_path, i)

        #print("running a step")
        training_manager.run_train_step(batch_tensor, sess, i)
        #print("finished a step")

        duration = time.time() - start_time

        #   """ With the current trained net, let the outputmanager print and save all the outputs """
        if config_main.output_is_on:
            output_manager.print_outputs(i, duration)