Esempio n. 1
0
    def test_path_drop_weights(self):
        # Tests the effect of path-drop on network's feature maps.
        # It sets up a minimal-training process to check the
        # feature before and after running the 'train_op' while
        # path-drop is in effect.

        train_val_test = 'train'
        # overwrite the training iterations
        self.train_config.max_iterations = 2
        self.train_config.overwrite_checkpoints = True

        # Overwrite path drop probabilities
        model_config = config_builder.proto_to_obj(self.model_config)
        model_config.path_drop_probabilities = [0.0, 0.8]

        with tf.Graph().as_default():
            # Set a graph-level seed
            tf.set_random_seed(1245)
            model = RpnModel(model_config,
                             train_val_test=train_val_test,
                             dataset=self.dataset)
            prediction_dict = model.build()
            losses_dict, total_loss = model.loss(prediction_dict)

            global_summaries = set([])
            # Optimizer
            training_optimizer = optimizer_builder.build(
                self.train_config.optimizer,
                global_summaries)
            train_op = slim.learning.create_train_op(
                total_loss,
                training_optimizer)

            init_op = tf.global_variables_initializer()

            with tf.Session() as sess:
                sess.run(init_op)
                for step in range(1, self.train_config.max_iterations):
                    feed_dict = model.create_feed_dict()
                    if step == 1:
                        current_feature_maps = sess.run(model.img_feature_maps,
                                                        feed_dict=feed_dict)
                        exp_feature_maps = current_feature_maps
                    train_op_loss = sess.run(train_op, feed_dict=feed_dict)
                    print('Step {}, Total Loss {:0.3f} '.
                          format(step, train_op_loss))

                    updated_feature_maps = sess.run(model.img_feature_maps,
                                                    feed_dict=feed_dict)
            # The feature maps should have remained the same since
            # the image path was dropped
            np.testing.assert_array_almost_equal(
                updated_feature_maps, exp_feature_maps, decimal=4)
Esempio n. 2
0
def set_up_model_train_mode(pipeline_config_path, data_split):
    """Returns the model and its train_op."""

    model_config, train_config, _,  dataset_config = \
        config_builder.get_configs_from_pipeline_file(
            pipeline_config_path, is_training=True)

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    model_name = model_config.model_name
    if model_name == 'rpn_model':
        model = RpnModel(model_config,
                         train_val_test=data_split,
                         dataset=dataset)
    elif model_name == 'avod_model':
        model = AvodModel(model_config,
                          train_val_test=data_split,
                          dataset=dataset)
    elif model_name == 'avod_ssd_model':
        model = AvodSSDModel(model_config,
                             train_val_test=data_split,
                             dataset=dataset)
    else:
        raise ValueError('Invalid model_name')

    prediction_dict = model.build()
    losses_dict, total_loss = model.loss(prediction_dict)

    # These parameters are required to set up the optimizer
    global_summaries = set([])
    global_step_tensor = tf.Variable(0, trainable=False)
    training_optimizer = optimizer_builder.build(train_config.optimizer,
                                                 global_summaries,
                                                 global_step_tensor)

    # Set up the train op
    train_op = slim.learning.create_train_op(total_loss, training_optimizer)

    return model, train_op
Esempio n. 3
0
def train(model, train_config):
    """Training function for detection models.

    Args:
        model: The detection model object.
        train_config: a train_*pb2 protobuf.
            training i.e. loading RPN weights onto AVOD model.
    """

    model = model
    train_config = train_config
    # Get model configurations
    model_config = model.model_config

    # Create a variable tensor to hold the global step
    global_step_tensor = tf.Variable(
        0, trainable=False, name='global_step')

    #############################
    # Get training configurations
    #############################
    max_iterations = train_config.max_iterations
    summary_interval = train_config.summary_interval
    checkpoint_interval = \
        train_config.checkpoint_interval
    max_checkpoints = train_config.max_checkpoints_to_keep

    paths_config = model_config.paths_config
    logdir = paths_config.logdir
    if not os.path.exists(logdir):
        os.makedirs(logdir)

    checkpoint_dir = paths_config.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    checkpoint_path = checkpoint_dir + '/' + \
        model_config.checkpoint_name

    global_summaries = set([])

    # The model should return a dictionary of predictions
    prediction_dict = model.build()


    #WZN: for debug only
    #img_input_debug = model._rpn_model._img_preprocessed
    #bev_input_debug = model._rpn_model._bev_preprocessed
    #bev_pooled_debug = model._rpn_model.bev_input_pooled
    #img_pooled_debug = model._rpn_model.img_input_pooled
    #import numpy as np 
    #import matplotlib.pyplot as plt
    

    summary_histograms = train_config.summary_histograms
    summary_img_images = train_config.summary_img_images
    summary_bev_images = train_config.summary_bev_images

    ##############################
    # Setup loss
    ##############################
    losses_dict, total_loss = model.loss(prediction_dict)

    # Optimizer
    training_optimizer = optimizer_builder.build(
        train_config.optimizer,
        global_summaries,
        global_step_tensor)

    # Create the train op
    with tf.variable_scope('train_op'):
        train_op = slim.learning.create_train_op(
            total_loss,
            training_optimizer,
            clip_gradient_norm=1.0,
            global_step=global_step_tensor)

    # Save checkpoints regularly.
    saver = tf.train.Saver(max_to_keep=max_checkpoints,
                           pad_step_number=True)

    # Add the result of the train_op to the summary
    tf.summary.scalar("training_loss", train_op)

    # Add maximum memory usage summary op
    # This op can only be run on device with gpu
    # so it's skipped on travis
    is_travis = 'TRAVIS' in os.environ
    if not is_travis:
        # tf.summary.scalar('bytes_in_use',
        #                   tf.contrib.memory_stats.BytesInUse())
        tf.summary.scalar('max_bytes',
                          tf.contrib.memory_stats.MaxBytesInUse())

    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    summary_merged = summary_utils.summaries_to_keep(
        summaries,
        global_summaries,
        histograms=summary_histograms,
        input_imgs=summary_img_images,
        input_bevs=summary_bev_images
    )

    allow_gpu_mem_growth = train_config.allow_gpu_mem_growth
    if allow_gpu_mem_growth:
        # GPU memory config
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = allow_gpu_mem_growth
        sess = tf.Session(config=config)
    else:
        sess = tf.Session()

    # Create unique folder name using datetime for summary writer
    datetime_str = str(datetime.datetime.now())
    logdir = logdir + '/train'
    train_writer = tf.summary.FileWriter(logdir + '/' + datetime_str,
                                         sess.graph)

    # Create init op
    init = tf.global_variables_initializer()

    # Continue from last saved checkpoint
    if not train_config.overwrite_checkpoints:
        trainer_utils.load_checkpoints(checkpoint_dir,
                                       saver)
        if len(saver.last_checkpoints) > 0:
            checkpoint_to_restore = saver.last_checkpoints[-1]
            saver.restore(sess, checkpoint_to_restore)
        else:
            # Initialize the variables
            sess.run(init)
    else:
        # Initialize the variables
        sess.run(init)

    # Read the global step if restored
    global_step = tf.train.global_step(sess,
                                       global_step_tensor)
    print('Starting from step {} / {}'.format(
        global_step, max_iterations))



    # Main Training Loop
    last_time = time.time()
    for step in range(global_step, max_iterations + 1):

        # Save checkpoint
        if step % checkpoint_interval == 0:
            global_step = tf.train.global_step(sess,
                                               global_step_tensor)

            saver.save(sess,
                       save_path=checkpoint_path,
                       global_step=global_step)

            print('Step {} / {}, Checkpoint saved to {}-{:08d}'.format(
                step, max_iterations,
                checkpoint_path, global_step))

        # Create feed_dict for inferencing
        feed_dict = model.create_feed_dict()

        #WZN: only run for debug 
        #bev_p,img_p = sess.run([bev_pooled_debug,img_pooled_debug],feed_dict)
        #bev_p = np.squeeze(bev_p)
        #img_p = np.squeeze(img_p)
        #import pdb
        #pdb.set_trace()

        # Write summaries and train op
        if step % summary_interval == 0:
            current_time = time.time()
            time_elapsed = current_time - last_time
            last_time = current_time

            train_op_loss, summary_out = sess.run(
                [train_op, summary_merged], feed_dict=feed_dict)

            print('Step {}, Total Loss {:0.3f}, Time Elapsed {:0.3f} s'.format(
                step, train_op_loss, time_elapsed))
            train_writer.add_summary(summary_out, step)

        else:
            # Run the train op only
            sess.run(train_op, feed_dict)

    # Close the summary writers
    train_writer.close()
Esempio n. 4
0
def train(model, train_config):
    """Training function for detection models.

    Args:
        model: The detection model object.
        train_config: a train_*pb2 protobuf.
            training i.e. loading RPN weights onto AVOD model.
    """

    model = model
    train_config = train_config
    # Get model configurations
    model_config = model.model_config

    # Create a variable tensor to hold the global step
    global_step_tensor = tf.Variable(0, trainable=False, name='global_step')

    #############################
    # Get training configurations
    #############################
    max_iterations = train_config.max_iterations
    summary_interval = train_config.summary_interval
    checkpoint_interval = \
        train_config.checkpoint_interval
    max_checkpoints = train_config.max_checkpoints_to_keep

    paths_config = model_config.paths_config
    logdir = paths_config.logdir
    if not os.path.exists(logdir):
        os.makedirs(logdir)

    checkpoint_dir = paths_config.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    checkpoint_path = checkpoint_dir + '/' + \
        model_config.checkpoint_name

    global_summaries = set([])

    # The model should return a dictionary of predictions
    prediction_dict = model.build()

    summary_histograms = train_config.summary_histograms
    summary_img_images = train_config.summary_img_images
    summary_bev_images = train_config.summary_bev_images

    ##############################
    # Setup loss
    ##############################
    loss_dict, total_loss, rpn_score_2d_loss, \
    rpn_acc_score_neg, rpn_acc_score_pos, \
    rpn_class_loss, rpn_reg_loss, rpn_acc_all, \
    rpn_acc_pos, refine_class_loss, refine_reg_loss, \
    avod_acc_all, avod_acc_pos = model.loss(prediction_dict)

    # Optimizer
    training_optimizer = optimizer_builder.build(train_config.optimizer,
                                                 global_summaries,
                                                 global_step_tensor)

    # Create the train op
    with tf.variable_scope('train_op'):
        train_op = slim.learning.create_train_op(
            total_loss,
            training_optimizer,
            clip_gradient_norm=1.0,
            global_step=global_step_tensor)

    # Save checkpoints regularly.
    saver = tf.train.Saver(max_to_keep=max_checkpoints, pad_step_number=True)

    # Add the result of the train_op to the summary
    tf.summary.scalar("training_loss", train_op)

    # Add maximum memory usage summary op
    # This op can only be run on device with gpu
    # so it's skipped on travis
    is_travis = 'TRAVIS' in os.environ
    if not is_travis:
        # tf.summary.scalar('bytes_in_use',
        #                   tf.contrib.memory_stats.BytesInUse())
        tf.summary.scalar('max_bytes', tf.contrib.memory_stats.MaxBytesInUse())

    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    summary_merged = summary_utils.summaries_to_keep(
        summaries,
        global_summaries,
        histograms=summary_histograms,
        input_imgs=summary_img_images,
        input_bevs=summary_bev_images)

    allow_gpu_mem_growth = train_config.allow_gpu_mem_growth
    if allow_gpu_mem_growth:
        # GPU memory config
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = allow_gpu_mem_growth
        sess = tf.Session(config=config)
    else:
        sess = tf.Session()

    # Create unique folder name using datetime for summary writer
    datetime_str = str(datetime.datetime.now())
    logdir = logdir + '/train'
    train_writer = tf.summary.FileWriter(logdir + '/' + datetime_str,
                                         sess.graph)

    # Create init op
    init = tf.global_variables_initializer()

    # Continue from last saved checkpoint
    if not train_config.overwrite_checkpoints:
        trainer_utils.load_checkpoints(checkpoint_dir, saver)

        all_variables = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES)
        sess.run(tf.variables_initializer(all_variables))
        var_list = [
            var for var in all_variables if "beta" not in var.name
            and 'Adam' not in var.name and 'Average' not in var.name
        ]

        saver_part = tf.train.Saver(var_list=var_list)

        if train_config.use_pretrained:
            saver_part.restore(sess, train_config.pretrained)
            print('Model loaded from: {}'.format(train_config.pretrained))
            step_base = tf.train.global_step(sess, global_step_tensor)

        elif len(saver.last_checkpoints) > 0:
            checkpoint_to_restore = saver.last_checkpoints[-1]
            saver_part.restore(sess, checkpoint_to_restore)
            step_base = 0

        else:
            # Initialize the variables
            sess.run(init)
    else:
        # Initialize the variables
        sess.run(init)

    global_step = tf.train.global_step(sess, global_step_tensor) - step_base

    print('Starting from step {} / {}'.format(global_step, max_iterations))

    # Main Training Loop
    last_time = time.time()
    for step in range(global_step, max_iterations + 1):

        # Save checkpoint
        if step % checkpoint_interval == 0:
            global_step = tf.train.global_step(sess,
                                               global_step_tensor) - step_base

            saver.save(sess,
                       save_path=checkpoint_path,
                       global_step=global_step)

            print('Step {} / {}, Checkpoint saved to {}-{:08d}'.format(
                step, max_iterations, checkpoint_path, global_step))

        # Create feed_dict for inferencing
        feed_dict = model.create_feed_dict()

        # Write summaries and train op
        if step % summary_interval == 0:
            current_time = time.time()
            time_elapsed = current_time - last_time
            last_time = current_time

            #train_op_loss, summary_out = sess.run(
            #    [train_op, summary_merged], feed_dict=feed_dict)

            train_op_loss, summary_out, \
            rpn_score_2d_loss_np, \
            rpn_acc_score_neg_np, \
            rpn_acc_score_pos_np, \
            rpn_class_loss_np, rpn_reg_loss_np, \
            rpn_acc_all_np, rpn_acc_pos_np, \
            refine_class_loss_np, refine_reg_loss_np, \
            avod_acc_all_np, avod_acc_pos_np = sess.run(
                [train_op, summary_merged,
                 rpn_score_2d_loss,
                 rpn_acc_score_neg,
                 rpn_acc_score_pos,
                 rpn_class_loss, rpn_reg_loss,
                 rpn_acc_all, rpn_acc_pos,
                 refine_class_loss, refine_reg_loss,
                 avod_acc_all, avod_acc_pos], feed_dict=feed_dict)

            print('Step {}, Total Loss {:0.3f} | Score {:0.3f}, Acc {:0.2f} {:0.2f} | RPN Class {:0.3f}, Reg {:0.3f}, Acc {:0.2f} {:0.2f} | Final Class {:0.3f}, Reg {:0.3f}, Acc {:0.2f} {:0.2f}'.format(\
                   step, train_op_loss, rpn_score_2d_loss_np, rpn_acc_score_neg_np * 100, \
                   rpn_acc_score_pos_np * 100, rpn_class_loss_np, rpn_reg_loss_np, rpn_acc_all_np * 100, \
                   rpn_acc_pos_np * 100, refine_class_loss_np, refine_reg_loss_np, avod_acc_all_np * 100, avod_acc_pos_np * 100))
            train_writer.add_summary(summary_out, step)

        else:
            # Run the train op only
            sess.run(train_op, feed_dict)

    # Close the summary writers
    train_writer.close()
def train(model, train_config):
    """Training function for detection models.

    Args:
        model: The detection model object.
        train_config: a train_*pb2 protobuf.
            training i.e. loading RPN weights onto AVOD model.
    """

    model = model
    train_config = train_config
    # Get model configurations
    model_config = model.model_config

    # Create a variable tensor to hold the global step
    global_step_tensor = tf.Variable(0, trainable=False, name='global_step')

    #############################
    # Get training configurations
    #############################
    max_iterations = train_config.max_iterations
    summary_interval = train_config.summary_interval
    checkpoint_interval = \
        train_config.checkpoint_interval
    max_checkpoints = train_config.max_checkpoints_to_keep

    paths_config = model_config.paths_config
    logdir = paths_config.logdir
    if not os.path.exists(logdir):
        os.makedirs(logdir)

    checkpoint_dir = paths_config.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    checkpoint_path = checkpoint_dir + '/' + \
        model_config.checkpoint_name

    global_summaries = set([])

    # The model should return a dictionary of predictions
    prediction_dict = model.build()

    summary_histograms = train_config.summary_histograms
    summary_img_images = train_config.summary_img_images
    summary_bev_images = train_config.summary_bev_images

    ##############################
    # Setup loss
    ##############################
    losses_dict, total_loss = model.loss(prediction_dict)

    ##############################################################################################
    # Select trainable variables of the original AVOD model to set gradient to 0(var0)

    var_moe = [
        var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                         scope='mix_of_experts')
    ]
    var0 = [var for var in tf.trainable_variables()]
    var_all_but_var_moe = [
        var for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    ]

    for var in var_moe:
        var0.remove(var)
        var_all_but_var_moe.remove(var)

    ##############################################################################################
    # Create optimizer with 0 gradient for the AVOD model and an optimizer for the MoE
    training_optimizer0 = tf.train.GradientDescentOptimizer(0.0)
    training_optimizer1 = optimizer_builder.build(train_config.optimizer,
                                                  global_summaries,
                                                  global_step_tensor)
    ##############################################################################################

    # Create the train op. train_op1 is for MoE and train_op0 is for AVOD
    with tf.variable_scope('train_op'):
        #Create training operations
        train_op1 = slim.learning.create_train_op(
            total_loss,
            training_optimizer1,
            variables_to_train=var_moe,
            clip_gradient_norm=1.0,
            global_step=global_step_tensor)
        train_op0 = slim.learning.create_train_op(
            total_loss,
            training_optimizer0,
            variables_to_train=var0,
            clip_gradient_norm=1.0,
            global_step=global_step_tensor)
        train_op = tf.group(train_op1, train_op0)

    ##############################################################################################

    # Save checkpoints regularly.
    saver = tf.train.Saver(max_to_keep=max_checkpoints, pad_step_number=True)

    # Add the result of the train_op to the summary
    tf.summary.scalar("training_loss", train_op1)
    # Add maximum memory usage summary op
    # This op can only be run on device with gpu
    # so it's skipped on travis
    is_travis = 'TRAVIS' in os.environ
    if not is_travis:
        # tf.summary.scalar('bytes_in_use',
        #                   tf.contrib.memory_stats.BytesInUse())
        tf.summary.scalar('max_bytes', tf.contrib.memory_stats.MaxBytesInUse())

    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    summary_merged = summary_utils.summaries_to_keep(
        summaries,
        global_summaries,
        histograms=summary_histograms,
        input_imgs=summary_img_images,
        input_bevs=summary_bev_images)

    allow_gpu_mem_growth = train_config.allow_gpu_mem_growth
    if allow_gpu_mem_growth:
        # GPU memory config
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = allow_gpu_mem_growth
        sess = tf.Session(config=config)
    else:
        sess = tf.Session()

    # Create unique folder name using datetime for summary writer
    datetime_str = str(datetime.datetime.now())
    logdir = logdir + '/train'
    train_writer = tf.summary.FileWriter(logdir + '/' + datetime_str,
                                         sess.graph)

    # Create init op
    init = tf.global_variables_initializer()

    # Continue from last saved checkpoint
    if not train_config.overwrite_checkpoints:
        trainer_utils.load_checkpoints(checkpoint_dir, saver)
        if len(saver.last_checkpoints) > 0:
            checkpoint_to_restore = saver.last_checkpoints[-1]
            saver.restore(sess, checkpoint_to_restore)
        else:

            # Initialize the variables

            # Restore checkpoints from original avod model. Give the correct path to restore
            checkpoint_path_start = train_config.moe_config.initial_avod_checkpoint_path
            variables_to_restore = dict()
            for var in var_all_but_var_moe:

                variables_to_restore[var.op.name] = slim.get_unique_variable(
                    var.op.name)

            init_assign_op, init_feed_dict = slim.assign_from_checkpoint(
                checkpoint_path_start, variables_to_restore)
            sess.run(init)
            sess.run(init_assign_op, init_feed_dict)
            ##############################################################################################
    else:

        # Initialize the variables
        sess.run(init)

    # Read the global step if restored
    global_step = tf.train.global_step(sess, global_step_tensor)
    print('Starting from step {} / {}'.format(global_step, max_iterations))

    # Main Training Loop
    last_time = time.time()
    for step in range(global_step, max_iterations + 1):

        # Save checkpoint
        if step % checkpoint_interval == 0:
            global_step = tf.train.global_step(sess, global_step_tensor)

            saver.save(sess,
                       save_path=checkpoint_path,
                       global_step=global_step)

            print('Step {} / {}, Checkpoint saved to {}-{:08d}'.format(
                step, max_iterations, checkpoint_path, global_step))

        # Create feed_dict for inferencing
        feed_dict = model.create_feed_dict()

        # Write summaries and train op
        if step % summary_interval == 0:
            current_time = time.time()
            time_elapsed = current_time - last_time
            last_time = current_time

            train_op_loss, summary_out = sess.run([train_op1, summary_merged],
                                                  feed_dict=feed_dict)
            print(train_op_loss)

            print('Step {}, Total Loss {:0.3f}, Time Elapsed {:0.3f} s'.format(
                step, train_op_loss, time_elapsed))
            train_writer.add_summary(summary_out, step)

        else:
            # Run the train op only
            sess.run(train_op1, feed_dict)

    # Close the summary writers
    train_writer.close()
Esempio n. 6
0
    def test_vgg_layers_build(self):
        train_config_text_proto = """
        optimizer {
          gradient_descent {
           learning_rate {
             constant_learning_rate {
               learning_rate: 0.1
              }
            }
          }
        }
        """
        train_config = train_pb2.TrainConfig()
        text_format.Merge(train_config_text_proto, train_config)
        global_summaries = set([])
        batch_size = 1

        with tf.Graph().as_default():
            with tf.name_scope('input'):
                # BEV image placeholder
                image_placeholder = tf.placeholder(tf.float32,
                                                   (None, 700, 800, 6))
                image_summary = tf.expand_dims(image_placeholder, axis=0)
                tf.summary.image("image", image_summary, max_outputs=5)

            # Check invalid BEV shape
            bev_shape = (300, 300)
            processed_image = self.bev_vgg_cls.preprocess_input(
                image_placeholder, bev_shape)

            predictions, end_points = self.bev_vgg_cls.build(processed_image,
                                                             num_classes=1,
                                                             is_training=True)

            feed_dict, label_pl = fill_feed_dict(self.dataset,
                                                 image_placeholder, batch_size)

            ###########################
            # Loss Function
            ###########################
            cross_entropy = tf.nn.weighted_cross_entropy_with_logits(
                label_pl, predictions, 1.0)
            loss = tf.reduce_mean(cross_entropy)

            ###########################
            # Optimizer
            ###########################
            training_optimizer = optimizer_builder.build(
                train_config.optimizer, global_summaries)

            ###########################
            # Train-op
            ###########################
            train_op = slim.learning.create_train_op(loss, training_optimizer)

            sess = tf.Session()
            init = tf.global_variables_initializer()
            sess.run(init)

            loss = sess.run(train_op, feed_dict=feed_dict)

            self.assertLess(loss, 1)
            print('Loss ', loss)