Ejemplo n.º 1
0
def model_fn(features, labels, mode, params):

    # ***********************************************************************************************
    # *                                         Backbone Net                                           *
    # ***********************************************************************************************
    net_config = params["net_config"]
    if mode == tf.estimator.ModeKeys.TRAIN:
        IS_TRAINING = True
    else:
        IS_TRAINING = False

    origin_image_batch = features["image"]
    image_batch = origin_image_batch - tf.convert_to_tensor(
        net_config.PIXEL_MEANS, dtype=tf.float32)
    image_window = features["image_window"]
    # there is is_training means that bn is training, so it is important!
    _, share_net = get_network_byname(net_name='resnet_v1_50',
                                      inputs=image_batch,
                                      num_classes=None,
                                      is_training=True,
                                      global_pool=True,
                                      output_stride=None,
                                      spatial_squeeze=True)
    # ***********************************************************************************************
    # *                                      FPN                                          *
    # ***********************************************************************************************
    feature_pyramid = build_fpn.build_feature_pyramid(share_net, net_config)
    # ***********************************************************************************************
    # *                                      RPN                                             *
    # ***********************************************************************************************
    gtboxes_and_label_batch = labels.get("gt_box_labels")
    rpn = build_rpn.RPN(feature_pyramid=feature_pyramid,
                        image_window=image_window,
                        config=net_config)

    # rpn_proposals_scores==(2000,)
    rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals(IS_TRAINING)
    rpn_location_loss, rpn_classification_loss = rpn.rpn_losses(
        labels["minibatch_indices"], labels["minibatch_encode_gtboxes"],
        labels["minibatch_objects_one_hot"])

    rpn_total_loss = rpn_classification_loss + rpn_location_loss

    # ***********************************************************************************************
    # *                                   Fast RCNN Head                                          *
    # ***********************************************************************************************

    fpn_fast_rcnn_head = build_head.FPNHead(
        feature_pyramid=feature_pyramid,
        rpn_proposals_boxes=rpn_proposals_boxes,
        origin_image=origin_image_batch,
        gtboxes_and_label=gtboxes_and_label_batch,
        config=net_config,
        is_training=False,
        image_window=image_window)

    detections = fpn_fast_rcnn_head.head_detection()
    if net_config.DEBUG:
        print_tensors(rpn_proposals_scores[0, :50], "scores")
        print_tensors(rpn_proposals_boxes[0, :50, :], "bbox")
        rpn_proposals_vision = draw_boxes_with_scores(
            origin_image_batch[0, :, :, :], rpn_proposals_boxes[0, :50, :],
            rpn_proposals_scores[0, :50])
        head_vision = draw_boxes_with_categories_and_scores(
            origin_image_batch[0, :, :, :], detections[0, :, :4],
            detections[0, :, 4], detections[0, :, 5], net_config.LABEL_TO_NAME)
        tf.summary.image("rpn_proposals_vision", rpn_proposals_vision)
        tf.summary.image("head_vision", head_vision)

    head_location_loss, head_classification_loss = fpn_fast_rcnn_head.head_loss(
    )
    head_total_loss = head_location_loss + head_classification_loss

    # train
    with tf.name_scope("regularization_losses"):
        regularization_list = [
            tf.nn.l2_loss(w.read_value()) * net_config.WEIGHT_DECAY /
            tf.cast(tf.size(w.read_value()), tf.float32)
            for w in tf.trainable_variables()
            if 'gamma' not in w.name and 'beta' not in w.name
        ]
        regularization_loss = tf.add_n(regularization_list)

    total_loss = regularization_loss + head_total_loss + rpn_total_loss
    total_loss = tf.cond(tf.is_nan(total_loss), lambda: 0.0,
                         lambda: total_loss)
    print_tensors(head_total_loss, "head_loss")
    print_tensors(rpn_total_loss, "rpn_loss")
    global_step = tf.train.get_or_create_global_step()
    tf.train.init_from_checkpoint(
        net_config.CHECKPOINT_DIR,
        {net_config.BACKBONE_NET + "/": net_config.BACKBONE_NET + "/"})
    with tf.name_scope("optimizer"):
        lr = tf.train.piecewise_constant(
            global_step,
            boundaries=[np.int64(net_config.BOUNDARY[0])],
            values=[net_config.LEARNING_RATE, net_config.LEARNING_RATE / 10])
        optimizer = tf.train.MomentumOptimizer(lr,
                                               momentum=net_config.MOMENTUM)
        optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies([tf.group(*update_ops)]):
            grads = optimizer.compute_gradients(total_loss)
            # clip gradients
            grads = tf.contrib.training.clip_gradient_norms(
                grads, net_config.CLIP_GRADIENT_NORM)
            train_op = optimizer.apply_gradients(grads, global_step)

    # ***********************************************************************************************
    # *                                          Summary                                            *
    # ***********************************************************************************************
    # rpn loss and image
    tf.summary.scalar('rpn_location_loss',
                      rpn_location_loss,
                      family="rpn_loss")
    tf.summary.scalar('rpn_classification_loss',
                      rpn_classification_loss,
                      family="rpn_loss")
    tf.summary.scalar('rpn_total_loss', rpn_total_loss, family="rpn_loss")

    tf.summary.scalar('head_location_loss',
                      head_location_loss,
                      family="head_loss")
    tf.summary.scalar('head_classification_loss',
                      head_classification_loss,
                      family="head_loss")
    tf.summary.scalar('head_total_loss', head_total_loss, family="head_loss")
    tf.summary.scalar("regularization_loss", regularization_loss)
    tf.summary.scalar('total_loss', total_loss)
    tf.summary.scalar('learning_rate', lr)

    meta_hook = MetadataHook(save_steps=net_config.SAVE_EVERY_N_STEP *
                             net_config.EPOCH / 2,
                             output_dir=net_config.MODLE_DIR)
    summary_hook = tf.train.SummarySaverHook(
        save_steps=net_config.SAVE_EVERY_N_STEP,
        output_dir=net_config.MODLE_DIR,
        summary_op=tf.summary.merge_all())
    hooks = [summary_hook]
    if net_config.COMPUTE_TIME:
        hooks.append(meta_hook)
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          train_op=train_op,
                                          training_hooks=hooks)

    # ***********************************************************************************************
    # *                                            EVAL                                             *
    # ***********************************************************************************************
    metric_ap_dict = batch_slice([
        features["gt_box_labels"][:, :, :4], features["gt_box_labels"][:, :,
                                                                       4],
        detections[:, :, :4], detections[:, :, 4], detections[:, :, 5]
    ], lambda x, y, z, u, v: compute_metric_ap(x, y, z, u, v, net_config),
                                 net_config.PER_GPU_IMAGE)

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=metric_ap_dict)
def train():

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        num_gpu = len(cfgs.GPU_GROUP.strip().split(','))
        global_step = slim.get_or_create_global_step()
        lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu)
        tf.summary.scalar('lr', lr)

        with tf.name_scope('get_batch'):
            if cfgs.IMAGE_PYRAMID:
                shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN)
                shortside_len = tf.random_shuffle(shortside_len_list)[0]
            else:
                shortside_len = cfgs.IMG_SHORT_SIDE_LEN

            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,
                           batch_size=cfgs.BATCH_SIZE * num_gpu,
                           shortside_len=shortside_len,
                           is_training=True)

        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        retinanet = build_whole_network_refine_retinanet.DetectionNetwork(
            base_network_name=cfgs.NET_NAME, is_training=True)

        # data processing
        inputs_list = []
        for i in range(num_gpu):
            img = tf.expand_dims(img_batch[i], axis=0)

            if cfgs.NET_NAME in [
                    'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d'
            ]:
                img = img / tf.constant([cfgs.PIXEL_STD])

            gtboxes_and_label_r = tf.py_func(backward_convert,
                                             inp=[gtboxes_and_label_batch[i]],
                                             Tout=tf.float32)
            gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6])

            gtboxes_and_label_h = get_horizen_minAreaRectangle(
                gtboxes_and_label_batch[i])
            gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5])

            num_objects = num_objects_batch[i]
            num_objects = tf.cast(tf.reshape(num_objects, [
                -1,
            ]), tf.float32)

            img_h = img_h_batch[i]
            img_w = img_w_batch[i]

            inputs_list.append([
                img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects,
                img_h, img_w
            ])

        tower_grads = []
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            cfgs.WEIGHT_DECAY)

        total_loss_dict = {
            'cls_loss': tf.constant(0., tf.float32),
            'reg_loss': tf.constant(0., tf.float32),
            'refine_cls_loss': tf.constant(0., tf.float32),
            'refine_reg_loss': tf.constant(0., tf.float32),
            'total_losses': tf.constant(0., tf.float32),
        }

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpu):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i):
                        with slim.arg_scope(
                            [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope(
                                [
                                    slim.conv2d, slim.conv2d_in_plane,
                                    slim.conv2d_transpose,
                                    slim.separable_conv2d, slim.fully_connected
                                ],
                                    weights_regularizer=weights_regularizer,
                                    biases_regularizer=biases_regularizer,
                                    biases_initializer=tf.constant_initializer(
                                        0.0)):

                                gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func(
                                    get_gtboxes_and_label,
                                    inp=[
                                        inputs_list[i][1], inputs_list[i][2],
                                        inputs_list[i][3]
                                    ],
                                    Tout=[tf.float32, tf.float32])
                                gtboxes_and_label_h = tf.reshape(
                                    gtboxes_and_label_h, [-1, 5])
                                gtboxes_and_label_r = tf.reshape(
                                    gtboxes_and_label_r, [-1, 6])

                                img = inputs_list[i][0]
                                img_shape = inputs_list[i][-2:]
                                img = tf.image.crop_to_bounding_box(
                                    image=img,
                                    offset_height=0,
                                    offset_width=0,
                                    target_height=tf.cast(
                                        img_shape[0], tf.int32),
                                    target_width=tf.cast(
                                        img_shape[1], tf.int32))

                                outputs = retinanet.build_whole_detection_network(
                                    input_img_batch=img,
                                    gtboxes_batch_h=gtboxes_and_label_h,
                                    gtboxes_batch_r=gtboxes_and_label_r,
                                    gpu_id=i)
                                gtboxes_in_img_h = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_h[:, :-1],
                                    labels=gtboxes_and_label_h[:, -1],
                                    method=0)
                                gtboxes_in_img_r = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_r[:, :-1],
                                    labels=gtboxes_and_label_r[:, -1],
                                    method=1)
                                tf.summary.image(
                                    'Compare/gtboxes_h_gpu:%d' % i,
                                    gtboxes_in_img_h)
                                tf.summary.image(
                                    'Compare/gtboxes_r_gpu:%d' % i,
                                    gtboxes_in_img_r)

                                if cfgs.ADD_BOX_IN_TENSORBOARD:
                                    detections_in_img = draw_boxes_with_categories_and_scores(
                                        img_batch=img,
                                        boxes=outputs[0],
                                        scores=outputs[1],
                                        labels=outputs[2],
                                        method=1)
                                    tf.summary.image(
                                        'Compare/final_detection_gpu:%d' % i,
                                        detections_in_img)

                                loss_dict = outputs[-1]

                                total_losses = 0.0
                                for k in loss_dict.keys():
                                    total_losses += loss_dict[k]
                                    total_loss_dict[
                                        k] += loss_dict[k] / num_gpu

                                total_losses = total_losses / num_gpu
                                total_loss_dict['total_losses'] += total_losses

                                if i == num_gpu - 1:
                                    regularization_losses = tf.get_collection(
                                        tf.GraphKeys.REGULARIZATION_LOSSES)
                                    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                    total_losses = total_losses + tf.add_n(
                                        regularization_losses)

                        tf.get_variable_scope().reuse_variables()
                        grads = optimizer.compute_gradients(total_losses)
                        if cfgs.GRADIENT_CLIPPING_BY_NORM is not None:
                            grads = slim.learning.clip_gradient_norms(
                                grads, cfgs.GRADIENT_CLIPPING_BY_NORM)
                        tower_grads.append(grads)

        for k in total_loss_dict.keys():
            tf.summary.scalar('{}/{}'.format(k.split('_')[0], k),
                              total_loss_dict[k])

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        final_gvs = []
        with tf.variable_scope('Gradient_Mult'):
            for grad, var in grads:
                scale = 1.
                if '/biases:' in var.name:
                    scale *= cfgs.MUTILPY_BIAS_GRADIENT
                if 'conv_new' in var.name:
                    scale *= 3.
                if not np.allclose(scale, 1.0):
                    grad = tf.multiply(grad, scale)

                final_gvs.append((grad, var))

        # apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)
        apply_gradient_op = optimizer.apply_gradients(final_gvs,
                                                      global_step=global_step)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.9999, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        train_op = tf.group(apply_gradient_op, variables_averages_op)
        # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        summary_op = tf.summary.merge_all()

        restorer, restore_ckpt = retinanet.get_restorer()
        saver = tf.train.Saver(max_to_keep=5)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        tfconfig = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
        tfconfig.gpu_options.allow_growth = True
        with tf.Session(config=tfconfig) as sess:
            sess.run(init_op)

            # sess.run(tf.initialize_all_variables())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path,
                                                   graph=sess.graph)

            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            for step in range(cfgs.MAX_ITERATION // num_gpu):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))

                if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                    _, global_stepnp = sess.run([train_op, global_step])

                else:
                    if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                        start = time.time()

                        _, global_stepnp, total_loss_dict_ = \
                                sess.run([train_op, global_step, total_loss_dict])

                        end = time.time()

                        print('***' * 20)
                        print("""%s: global_step:%d  current_step:%d""" %
                              (training_time,
                               (global_stepnp - 1) * num_gpu, step * num_gpu))
                        print("""per_cost_time:%.3fs""" %
                              ((end - start) / num_gpu))
                        loss_str = ''
                        for k in total_loss_dict_.keys():
                            loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                        print(loss_str)

                    else:
                        if step % cfgs.SMRY_ITER == 0:
                            _, global_stepnp, summary_str = sess.run(
                                [train_op, global_step, summary_op])
                            summary_writer.add_summary(
                                summary_str, (global_stepnp - 1) * num_gpu)
                            summary_writer.flush()

                if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu)
                        == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1):

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(
                        save_dir, '{}_'.format(cfgs.DATASET_NAME) + str(
                            (global_stepnp - 1) * num_gpu) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
def train():

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        global_step = slim.get_or_create_global_step()
        lr = tf.train.piecewise_constant(global_step,
                                         boundaries=[np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]),
                                                     np.int64(cfgs.DECAY_STEP[2])],
                                         values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100., cfgs.LR / 1000.])

        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                           is_training=True)

        with tf.name_scope('get_batch'):
            num_gpu = len(cfgs.GPU_GROUP.strip().split(','))
            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                           batch_size=cfgs.BATCH_SIZE * num_gpu,
                           shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                           is_training=True)
            # gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])
            # if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
            #     img_batch = img_batch / tf.constant([cfgs.PIXEL_STD])

        # data processing
        inputs_list = []
        for i in range(num_gpu):
            # img_name = img_name_batch[i]
            img = tf.expand_dims(img_batch[i], axis=0)

            gtboxes_and_label = tf.cast(tf.reshape(gtboxes_and_label_batch[i], [-1, 5]), tf.float32)
            num_objects = num_objects_batch[i]
            num_objects = tf.cast(tf.reshape(num_objects, [-1, ]), tf.float32)

            img_h = img_h_batch[i]
            img_w = img_w_batch[i]
            # img_h = tf.cast(tf.reshape(img_h, [-1, ]), tf.float32)
            # img_w = tf.cast(tf.reshape(img_w, [-1, ]), tf.float32)

            inputs_list.append([img, gtboxes_and_label, num_objects, img_h, img_w])

        # put_op_list = []
        # get_op_list = []
        # for i in range(num_gpu):
        #     with tf.device("/GPU:%s" % i):
        #         area = tf.contrib.staging.StagingArea(
        #             dtypes=[tf.float32, tf.float32, tf.float32])
        #         put_op_list.append(area.put(inputs_list[i]))
        #         get_op_list.append(area.get())

        tower_grads = []
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

        total_loss_dict = {
            'rpn_cls_loss': tf.constant(0., tf.float32),
            'rpn_loc_loss': tf.constant(0., tf.float32),
            'fastrcnn_cls_loss': tf.constant(0., tf.float32),
            'fastrcnn_loc_loss': tf.constant(0., tf.float32),
            'total_losses': tf.constant(0., tf.float32),

        }

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpu):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i):
                        with slim.arg_scope(
                                [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane,
                                                 slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                                                weights_regularizer=weights_regularizer,
                                                biases_regularizer=biases_regularizer,
                                                biases_initializer=tf.constant_initializer(0.0)):

                                gtboxes_and_label = tf.py_func(get_gtboxes_and_label,
                                                               inp=[inputs_list[i][1], inputs_list[i][2]],
                                                               Tout=tf.float32)
                                gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 5])

                                img = inputs_list[i][0]
                                img_shape = inputs_list[i][-2:]
                                img = tf.image.crop_to_bounding_box(image=img,
                                                                    offset_height=0,
                                                                    offset_width=0,
                                                                    target_height=tf.cast(img_shape[0], tf.int32),
                                                                    target_width=tf.cast(img_shape[1], tf.int32))

                                outputs = faster_rcnn.build_whole_detection_network(input_img_batch=img,
                                                                                    gtboxes_batch=gtboxes_and_label)
                                gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img,
                                                                                               boxes=gtboxes_and_label[
                                                                                                     :, :-1],
                                                                                               labels=gtboxes_and_label[
                                                                                                      :, -1])
                                tf.summary.image('Compare/gtboxes_gpu:%d' % i, gtboxes_in_img)

                                if cfgs.ADD_BOX_IN_TENSORBOARD:
                                    detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
                                        img_batch=img,
                                        boxes=outputs[0],
                                        scores=outputs[1],
                                        labels=outputs[2])
                                    tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img)

                                loss_dict = outputs[-1]

                                total_losses = 0.0
                                for k in loss_dict.keys():
                                    total_losses += loss_dict[k]
                                    total_loss_dict[k] += loss_dict[k] / num_gpu

                                total_losses = total_losses / num_gpu
                                total_loss_dict['total_losses'] += total_losses

                                if i == num_gpu - 1:
                                    regularization_losses = tf.get_collection(
                                        tf.GraphKeys.REGULARIZATION_LOSSES)
                                    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                    total_losses = total_losses + tf.add_n(regularization_losses)

                        tf.get_variable_scope().reuse_variables()
                        grads = optimizer.compute_gradients(total_losses)
                        tower_grads.append(grads)

        for k in total_loss_dict.keys():
            tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k])

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        # final_gvs = []
        # with tf.variable_scope('Gradient_Mult'):
        #     for grad, var in grads:
        #         scale = 1.
        #         # if '/biases:' in var.name:
        #         #    scale *= 2.
        #         if 'conv_new' in var.name:
        #             scale *= 3.
        #         if not np.allclose(scale, 1.0):
        #             grad = tf.multiply(grad, scale)
        #         final_gvs.append((grad, var))

        apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)

        variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        train_op = tf.group(apply_gradient_op, variables_averages_op)
        # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        summary_op = tf.summary.merge_all()

        restorer, restore_ckpt = faster_rcnn.get_restorer()
        saver = tf.train.Saver(max_to_keep=10)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        )

        tfconfig = tf.ConfigProto(
            allow_soft_placement=True, log_device_placement=False)
        tfconfig.gpu_options.allow_growth = True
        with tf.Session(config=tfconfig) as sess:
            sess.run(init_op)

            # sess.run(tf.initialize_all_variables())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            for step in range(cfgs.MAX_ITERATION // num_gpu):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

                # start = time.time()
                #
                # _, global_stepnp, total_loss_dict_ = \
                #     sess.run([train_op, global_step, total_loss_dict])
                #
                # end = time.time()
                #
                # print('**' * 20)
                # print("""%s: global_step%d  current_step%d"""
                #       % (training_time, global_stepnp * num_gpu, step * num_gpu))
                # print("""per_cost_time:%.3fs"""
                #       % ((end - start) / num_gpu))
                # loss_str = ''
                # for k in total_loss_dict_.keys():
                #     loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                # print(loss_str)

                if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                    _, global_stepnp = sess.run([train_op, global_step])

                else:
                    if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                        start = time.time()

                        _, global_stepnp, total_loss_dict_ = \
                            sess.run([train_op, global_step, total_loss_dict])

                        end = time.time()

                        print('***'*20)
                        print("""%s: global_step:%d  current_step:%d"""
                              % (training_time, (global_stepnp-1)*num_gpu, step*num_gpu))
                        print("""per_cost_time:%.3fs"""
                              % ((end - start) / num_gpu))
                        loss_str = ''
                        for k in total_loss_dict_.keys():
                            loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                        print(loss_str)

                    else:
                        if step % cfgs.SMRY_ITER == 0:
                            _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op])
                            summary_writer.add_summary(summary_str, (global_stepnp-1)*num_gpu)
                            summary_writer.flush()

                if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu) == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1):

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(save_dir, 'coco_' + str((global_stepnp-1)*num_gpu) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
Ejemplo n.º 4
0
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])
        if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']:
            img_batch = img_batch / tf.constant([cfgs.PIXEL_STD])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([
            slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
            slim.separable_conv2d, slim.fully_connected
    ],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    if cfgs.USE_ATTENTION:
        mask_total_loss = loss_dict['mask_loss']
        total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss + mask_total_loss
    else:
        total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss

    # ---------------------------------------------------------------------------------------------------add summary

    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)
    if cfgs.USE_ATTENTION:
        tf.summary.scalar('LOSS/mask_loss', mask_total_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
            img_batch=img_batch,
            boxes=final_bbox,
            labels=final_category,
            scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1]),
            np.int64(cfgs.DECAY_STEP[2])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100., cfgs.LR / 1000.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
    # optimizer = tf.train.AdamOptimizer(lr)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=10)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    if cfgs.USE_ATTENTION:
                        _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                        fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, maskLoss, totalLoss = \
                            sess.run(
                                [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                                 fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, mask_total_loss, total_loss])
                    else:

                        _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                        fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \
                            sess.run(
                                [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                                 fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss])

                    end = time.time()
                    if cfgs.USE_ATTENTION:
                        print(""" %s: step%d    image_name:%s |\t
                                  rpn_loc_loss:%.3f |\t rpn_cla_loss:%.3f |\t rpn_total_loss:%.3f |
                                  fast_rcnn_loc_loss:%.3f |\t fast_rcnn_cla_loss:%.3f |\t fast_rcnn_total_loss:%.3f |
                                  mask_loss:%.3f |\t total_loss:%.3f |\t per_cost_time:%.3fs"""
                              %
                              (training_time, global_stepnp, str(
                                  img_name[0]), rpnLocLoss, rpnClsLoss,
                               rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss,
                               fastrcnnTotalLoss, maskLoss, totalLoss,
                               (end - start)))
                    else:

                        print(""" %s: step%d    image_name:%s |\t
                                  rpn_loc_loss:%.3f |\t rpn_cla_loss:%.3f |\t rpn_total_loss:%.3f |
                                  fast_rcnn_loc_loss:%.3f |\t fast_rcnn_cla_loss:%.3f |\t fast_rcnn_total_loss:%.3f |
                                  total_loss:%.3f |\t per_cost_time:%.3fs""" %
                              (training_time, global_stepnp, str(
                                  img_name[0]), rpnLocLoss, rpnClsLoss,
                               rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss,
                               fastrcnnTotalLoss, totalLoss, (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.mkdir(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
Ejemplo n.º 5
0
def train():
    retinanet = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])
        if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
            img_batch = img_batch / tf.constant([cfgs.PIXEL_STD])

    final_bbox, final_scores, final_category, loss_dict = retinanet.build_whole_detection_network(
        input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    cls_loss = loss_dict['cls_loss']
    reg_loss = loss_dict['reg_loss']
    total_loss = cls_loss + reg_loss + weight_decay_loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RETINANET_LOSS/cls_loss', cls_loss)
    tf.summary.scalar('RETINANET_LOSS/reg_loss', reg_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
            img_batch=img_batch,
            boxes=final_bbox,
            labels=final_category,
            scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = retinanet.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = retinanet.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = retinanet.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        if not os.path.exists(summary_path):
            os.makedirs(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, img_name, reg_loss_, cls_loss_, total_loss_ = \
                        sess.run(
                            [train_op, global_step, img_name_batch, reg_loss, cls_loss, total_loss])

                    end = time.time()

                    print(""" {}: step{}    image_name:{} |\t
                              reg_loss:{} |\t cls_loss:{} |\t total_loss:{} |per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_name[0]), reg_loss_, cls_loss_, total_loss_,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                save_ckpt = os.path.join(
                    save_dir, '{}_'.format(cfgs.DATASET_NAME) +
                    str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
def train():

    #dataset prepare
    dataset_train=pascal_voc('trainval','2007')
    dataset_val=pascal_voc('test','2007')

    #network prepare
    faster_rcnn =build_faster_rcnn.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                       is_training=True)

    with tf.name_scope('get_batch'):

        img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3])
        gtbox_plac = tf.placeholder(dtype=tf.int32, shape=[None, 5])
        # img_batch=tf.random_normal([1,600,600,3],name='image_input')
        # gtboxes_and_label=tf.constant([50,60,100,200,4],dtype=tf.float32)

        img_batch, gtboxes_and_label = preprocess_img(img_plac, gtbox_plac)
        gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 5])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                         slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        final_bbox, final_scores,final_category ,loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch,
            gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss
    rcnn_cls_loss = loss_dict['rcnn_cls_loss']
    rcnn_loc_loss = loss_dict['rcnn_loc_loss']
    rcnn_total_loss = rcnn_cls_loss + rcnn_loc_loss
    total_loss = rpn_total_loss + rcnn_total_loss
    # ____________________________________________________________________________________________________build loss

    reg_loss1=tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    reg_loss2=tf.losses.get_regularization_loss()

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', rcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', rcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', rcnn_total_loss)
    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/total_loss_val',total_loss)
    # tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img_batch,
                                                                   boxes=gtboxes_and_label[:, :-1],
                                                                   labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                                     boxes=final_bbox,
                                                                                     labels=final_category,
                                                                                     scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    # ---------------------------------------------------------------------------------------------------add summary


    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(global_step,
                                     boundaries=[np.int64(cfgs.DECAY_STEP[0])],
                                     values=[cfgs.LR, cfgs.LR / 10.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients_YJR'):
            gradients = slim.learning.clip_gradient_norms(gradients,
                                                          cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients



    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()

    tf.summary.scalar('RPN_LOSS/cls_loss_val', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss_val', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss_val', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/rcnn_cls_loss_val', rcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/rcnn_location_loss_val', rcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/rcnn_total_loss_val', rcnn_total_loss)
    summary_op_val=tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,'RPN_LOSS/cls_loss_val'),
                                     tf.get_collection(tf.GraphKeys.SUMMARIES,'RPN_LOSS/location_loss_val'),
                                     tf.get_collection(tf.GraphKeys.SUMMARIES,'RPN_LOSS/rpn_total_loss_val'),
                                     tf.get_collection(tf.GraphKeys.SUMMARIES,'FAST_LOSS/rcnn_cls_loss_val'),
                                    tf.get_collection(tf.GraphKeys.SUMMARIES,'FAST_LOSS/rcnn_location_loss_val'),
                                     tf.get_collection(tf.GraphKeys.SUMMARIES,'FAST_LOSS/rcnn_total_loss_val')])
    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)
        global_stepnp=0
        for step in range(cfgs.MAX_ITERATION):

            img_id, img, gt_info = dataset_train.next_img(step=step)
            training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step],
                                            feed_dict={img_plac: img, gtbox_plac: gt_info})
            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()
                    _, global_stepnp, rpnLocLoss, rpnClsLoss, Cls_loss,Bbox_loss, totalLoss,reg_Loss1,reg_Loss2 =\
                        sess.run([train_op, global_step, rpn_location_loss, rpn_cls_loss,rcnn_cls_loss,rcnn_loc_loss, total_loss,reg_loss1,reg_loss2],
                        feed_dict={img_plac: img, gtbox_plac: gt_info})
                    end = time.time()
                    print("""{}: step{}  image_name:{}  rpn_loc_loss:{}  rpn_cla_loss:{}  rcnn_cls_loss:{} rcnn_bbox_loss:{}  total_loss:{}  per_cost_time:{}s \n \n""" \
                          .format(training_time, global_stepnp, str(img_id), rpnLocLoss, rpnClsLoss,Cls_loss, Bbox_loss, totalLoss, (end - start)))
                    print('regularizer 1 loss is ', sum(reg_Loss1))
                    print('regularizer 2 loss is ', reg_Loss2)
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op],
                                                                 feed_dict={img_plac: img, gtbox_plac: gt_info})
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if step % cfgs.VALIDATE_INTE==0:
                img_id_val, img_val, gt_info_val = dataset_val.next_img(step=step)
                summary_val=sess.run(summary_op_val, feed_dict={img_plac: img_val, gtbox_plac: gt_info_val})
                summary_writer.add_summary(summary_val,global_stepnp)
                summary_writer.flush()
            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.mkdir(save_dir)

                save_ckpt = os.path.join(save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')
Ejemplo n.º 7
0
def train():

    # Step 1:
    # clw note:传递网络名称如resnet_v1,是否训练is_training,以及每个位置含有anchor box的个数,
    #           构建基本的网络
    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)
    with tf.name_scope(
            'get_batch'
    ):  # clw note:tf.name_scope 主要结合 tf.Variable() 来使用,方便参数命名管理。
        # clw note:从文件队列、内存队列中读取、组合,得到该batch的内容
        #           主要包括每个批次(目前仅支持批次数目即batch_size=1,也就是这里每次只读出1张图片)
        #           对应的变量包括:图片名称、图片矩阵、ground truth坐标及对应的label,图片中包含的目标数
        #           这些变量的组成结构均为 [批次数目,相应批次中每一幅图片的相关信息]
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])
        # clw note:样本个数m不知道,但是对单个样本都有gtboxes的4个坐标,加上1个label共5个值;使用-1来自动计算样本个数

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # Step 2:
    # clw note:Faster R-CNN网络的搭建!

    # 先看一下下面这个函数arg_scope的声明
    # @tf_contextlib.contextmanager
    # def arg_scope(list_ops_or_scope, **kwargs): 功能是给list_ops中的内容设置默认值,即list中所有元素都用**kargs的参数设置。
    # 有函数修饰符@tf_contextlib.contextmanager修饰arg_scope函数:@之后一般接一个可调用对象为其执行一系列辅助操作,
    # 我们来看一个demo:
    #########################################
    # import time
    # def my_time(func):
    #     print(time.ctime())
    #     return func()
    #
    # @my_time  # 从这里可以看出@time 等价于 time(xxx()),但是这种写法你得考虑python代码的执行顺序
    # def xxx():
    #     print('Hello world!')
    #
    # 运行结果:
    # Wed Jul 26 23:01:21 2017
    # Hello world!
    ##########################################
    # 在这个例子中,xxx函数实现我们的主要功能,打印Hello world,但我们想给xxx函数添加一些辅助操作,让它同时打印出时间,于是我们用
    # 函数修饰符 @ my_time完成这个目标。整个例子的执行流程为调用my_time可调用对象,它接受xxx函数作为参数,先打印时间,再执行xxx函数
    # 详见:https://www.cnblogs.com/zzy-tf/p/9356883.html

    # 来看另一个demo:
    ##########################################
    # with slim.arg_scope(
    #                 [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],stride = 1, padding = 'VALID'):
    #             net = slim.conv2d(inputs, 32, [3, 3], stride = 2, scope = 'Conv2d_1a_3x3')
    #             net = slim.conv2d(net, 32, [3, 3], scope = 'Conv2d_2a_3x3')
    #             net = slim.conv2d(net, 64, [3, 3], padding = 'SAME', scope = 'Conv2d_2b_3x3')
    # 所以,在使用过程中可以直接slim.conv2d( )等函数设置默认参数。例如在下面的代码中,不做单独声明的情况下,
    # slim.conv2d, slim.max_pool2d, slim.avg_pool2d三个函数默认的步长都设为1,padding模式都是'VALID'的。
    # 当然也可以在调用时进行单独声明,只不过一个一个写很麻烦,不如统一给个默认值。
    # 这种参数设置方式在构建网络模型时,尤其是较深的网络时,可以节省时间。
    with slim.arg_scope([
            slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
            slim.separable_conv2d, slim.fully_connected
    ],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(
                            0.0)):  # list as many types of layers as possible,
        # even if they are not used now

        # build_whole_detection_network功能:构建整体网络架构,包含backbone,RPN网络,Pooling层,以及后续网络。
        # return:网络的最后的预测框,预测的类别信息,预测的概率,以及整体网络和RPN网络的损失,所有的损失被写入到一个字典中。
        final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)

    # ----------------------------------------------------------------------------------------------------build loss
    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    # weight_decay_loss = tf.add_n(tf.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    # clw note:根据论文的公式,最后将RPN网络的(分类,回归)误差与Fast-RCNN的(分类,回归)误差相加后作为总的误差进行训练即可。
    total_loss = rpn_total_loss + fastrcnn_total_loss
    # ____________________________________________________________________________________________________build loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    # tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:
        detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(
            img_batch=img_batch,
            boxes=final_bbox,
            labels=final_category,
            scores=final_scores)
        tf.summary.image('Compare/final_detection', detections_in_img)
    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(
        lr, momentum=cfgs.MOMENTUM)  # clw note:选择优化器,可以尝试其他选择,
    # 也可以尝试tf.train.AdamOptimizer(1e-4).minimize(total_loss)

    # ---------------------------------------------------------------------------------------------compute gradients

    # clw note:对于上面优化器没有使用minimize()的几点说明,
    # 使用minimize()操作,该操作不仅可以计算出梯度,而且还可以将梯度作用在变量上。
    # 如果想按照自己的方式处理梯度,可以按照以下步骤:
    # 1、使用compute_gradients()计算梯度,其实下面的get_gradients()方法就是optimizer.compute_gradients(loss)
    # 2、使用自己的方式进一步处理梯度
    # 3、使用apply_gradients()应用处理过后的梯度;

    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:  # clw note:clip_by_norm是指对梯度进行裁剪,通过控制梯度的最大范式,防止梯度爆炸的问题,是一种比较常用的梯度规约的方式
        with tf.name_scope('clip_gradients_YJR'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        # tools.mkdir(summary_path)
        if not os.path.exists(summary_path):
            os.makedirs(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \
                    fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \
                        sess.run(
                            [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,
                             fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss])

                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t
                              rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} |
                              fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |
                              total_loss:{} |\t per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_name[0]), rpnLocLoss, rpnClsLoss,
                                  rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
Ejemplo n.º 8
0
def model_fn(features, labels, mode, params, config):

    # ***********************************************************************************************
    # *                                         share net                                           *
    # ***********************************************************************************************
    net_config = params["net_config"]
    if mode == tf.estimator.ModeKeys.TRAIN:
        IS_TRAINING = True
    else:
        IS_TRAINING = False

    origin_image_batch = features["image"]
    image_window = features["image_window"]
    image_batch = origin_image_batch - net_config.PIXEL_MEANS
    # there is is_training means that bn is training, so it is important!
    _, share_net = get_network_byname(inputs=image_batch,
                                      config=net_config,
                                      is_training=False,
                                      reuse=tf.AUTO_REUSE)
    # ***********************************************************************************************
    # *                                            fpn                                              *
    # ***********************************************************************************************
    feature_pyramid = build_fpn.build_feature_pyramid(share_net, net_config)
    # ***********************************************************************************************
    # *                                            rpn                                              *
    # ***********************************************************************************************
    gtboxes_and_label_batch = labels.get("gt_box_labels")
    rpn = build_rpn.RPN(feature_pyramid=feature_pyramid,
                        image_window=image_window,
                        config=net_config)

    # rpn_proposals_scores==(2000,)
    rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals(IS_TRAINING)
    rpn_location_loss, rpn_classification_loss = rpn.rpn_losses(
        labels["minibatch_indices"], labels["minibatch_encode_gtboxes"],
        labels["minibatch_objects_one_hot"])

    rpn_total_loss = rpn_classification_loss + rpn_location_loss

    # ***********************************************************************************************
    # *                                         Rerference image                                           *
    # ***********************************************************************************************
    reference_image = load_reference_image()
    reference_image = tf.cast(reference_image, tf.float32)
    reference_image = reference_image - net_config.PIXEL_MEANS
    _, reference_share_net = get_network_byname(inputs=reference_image,
                                                config=net_config,
                                                is_training=False,
                                                reuse=tf.AUTO_REUSE)
    reference_feature_pyramid = build_fpn.build_feature_pyramid(
        reference_share_net, net_config)
    # average the features of support images
    # reference_feature_pyramid[key](C*S, H, W, 256)---->(C, 7, 7, 256)
    with tf.variable_scope('reference_feature_origision'):
        for key, value in reference_feature_pyramid.items():
            reference_feature_pyramid[key] = tf.image.resize_bilinear(
                reference_feature_pyramid[key],
                (net_config.ROI_SIZE, net_config.ROI_SIZE))

            reference_feature_pyramid[key] = tf.reduce_mean(tf.reshape(
                reference_feature_pyramid[key],
                (net_config.NUM_CLASS - 1, net_config.NUM_SUPPROTS,
                 net_config.ROI_SIZE, net_config.ROI_SIZE, 256)),
                                                            axis=1)
        # average the features of fpn features
        average_fpn_feature = []
        for key, value in reference_feature_pyramid.items():
            average_fpn_feature.append(value)
        reference_fpn_features = tf.reduce_mean(tf.stack(average_fpn_feature,
                                                         axis=0),
                                                axis=0)
        # compute the negative features
        with tf.variable_scope("reference_negative"):
            with slim.arg_scope(
                [slim.conv2d],
                    padding="SAME",
                    weights_initializer=tf.glorot_uniform_initializer(),
                    weights_regularizer=slim.l2_regularizer(
                        net_config.WEIGHT_DECAY)):
                # the shape of positive features is (1, H, W, C*channels)
                positive_features = tf.reshape(
                    tf.transpose(reference_fpn_features, (1, 2, 0, 3)),
                    (1, net_config.ROI_SIZE, net_config.ROI_SIZE,
                     (net_config.NUM_CLASS - 1) * 256))
                # (1, H, W, channels)
                negative_feature = slim.conv2d(positive_features,
                                               num_outputs=256,
                                               kernel_size=[3, 3],
                                               stride=1)
                total_refernece_feature = tf.concat(
                    [negative_feature, reference_fpn_features], axis=0)

    # ***********************************************************************************************
    # *                                         Fast RCNN                                           *
    # ***********************************************************************************************

    fast_rcnn = build_fast_rcnn.FastRCNN(
        feature_pyramid=feature_pyramid,
        rpn_proposals_boxes=rpn_proposals_boxes,
        origin_image=origin_image_batch,
        gtboxes_and_label=gtboxes_and_label_batch,
        reference_feature=total_refernece_feature,
        config=net_config,
        is_training=False,
        image_window=image_window)

    detections = fast_rcnn.fast_rcnn_detection()
    if DEBUG:
        rpn_proposals_vision = draw_boxes_with_scores(
            origin_image_batch[0, :, :, :], rpn_proposals_boxes[0, :50, :],
            rpn_proposals_scores[0, :50])
        fast_rcnn_vision = draw_boxes_with_categories_and_scores(
            origin_image_batch[0, :, :, :], detections[0, :, :4],
            detections[0, :, 4], detections[0, :, 5])
        tf.summary.image("rpn_proposals_vision", rpn_proposals_vision)
        tf.summary.image("fast_rcnn_vision", fast_rcnn_vision)

    fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss(
    )
    fast_rcnn_total_loss = 5.0 * fast_rcnn_classification_loss + fast_rcnn_location_loss

    # train
    with tf.variable_scope("regularization_losses"):
        regularization_list = [
            tf.nn.l2_loss(w.read_value()) * net_config.WEIGHT_DECAY /
            tf.cast(tf.size(w.read_value()), tf.float32)
            for w in tf.trainable_variables()
            if 'gamma' not in w.name and 'beta' not in w.name
        ]
        regularization_losses = tf.add_n(regularization_list)

    total_loss = regularization_losses + fast_rcnn_total_loss + rpn_total_loss
    global_step = slim.get_or_create_global_step()
    tf.train.init_from_checkpoint(
        net_config.CHECKPOINT_DIR,
        {net_config.NET_NAME + "/": net_config.NET_NAME + "/"})
    with tf.variable_scope("optimizer"):
        lr = tf.train.piecewise_constant(global_step,
                                         boundaries=[
                                             np.int64(net_config.BOUNDARY[0]),
                                             np.int64(net_config.BOUNDARY[1])
                                         ],
                                         values=[
                                             net_config.LEARNING_RATE,
                                             net_config.LEARNING_RATE / 10,
                                             net_config.LEARNING_RATE / 100
                                         ])
        optimizer = tf.train.MomentumOptimizer(lr,
                                               momentum=net_config.MOMENTUM)
        optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies([tf.group(*update_ops)]):
            grads = optimizer.compute_gradients(total_loss)
            for i, (g, v) in enumerate(grads):
                if g is not None:
                    grads[i] = (tf.clip_by_norm(g, 5.0), v)  # clip gradients
            train_op = optimizer.apply_gradients(grads, global_step)

    # ***********************************************************************************************
    # *                                          Summary                                            *
    # ***********************************************************************************************
    # rpn loss and image
    tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss)
    tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss)
    tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss',
                      fast_rcnn_location_loss)
    tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss',
                      fast_rcnn_classification_loss)
    tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss)
    tf.summary.scalar('learning_rate', lr)
    tf.summary.scalar('total_loss', total_loss)

    summary_hook = tf.train.SummarySaverHook(
        save_steps=net_config.SAVE_EVERY_N_STEP,
        output_dir=net_config.MODLE_DIR,
        summary_op=tf.summary.merge_all())

    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          train_op=train_op,
                                          training_hooks=[summary_hook])

    if mode == tf.estimator.ModeKeys.EVAL:
        predicts = {
            "predict_bbox": detections[:, :, :4],
            "predict_class_id": detections[:, :, 5],
            "predict_scores": detections[:, :, 4]
        }
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          predictions=predicts)

    if mode == tf.estimator.ModeKeys.PREDICT:
        predicts = {
            "predict_bbox": detections[:, :, :4],
            "predict_class_id": detections[:, :, 5],
            "predict_scores": detections[:, :, 4]
        }

        return tf.estimator.EstimatorSpec(mode, predictions=predicts)
Ejemplo n.º 9
0
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)
        gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([
            slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
            slim.separable_conv2d, slim.fully_connected
    ],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        # result_dict, losses_dict = faster_rcnn.build_whole_detection_network(input_img_batch=img_batch,
        #                                                                      gtboxes_batch=gtboxes_and_label)
        result_dict, losses_dict = faster_rcnn.build_whole_detection_network(
            input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label)
    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    # weight_decay_loss = tf.add_n(tf.losses.get_regularization_losses())

    bbox_loss_m1 = losses_dict['bbox_loss_m1']
    cls_loss_m1 = losses_dict['cls_loss_m1']
    total_loss_m1 = bbox_loss_m1 + cls_loss_m1

    bbox_loss_m2 = losses_dict['bbox_loss_m2']
    cls_loss_m2 = losses_dict['cls_loss_m2']
    total_loss_m2 = bbox_loss_m2 + cls_loss_m2

    bbox_loss_m3 = losses_dict['bbox_loss_m3']
    cls_loss_m3 = losses_dict['cls_loss_m3']
    total_loss_m3 = bbox_loss_m3 + cls_loss_m3

    total_loss = total_loss_m1 + total_loss_m2 + total_loss_m3 + weight_decay_loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('SSH_M1_LOSS/cls_loss_m1', cls_loss_m1)
    tf.summary.scalar('SSH_M1_LOSS/bbox_loss_m1', bbox_loss_m1)
    tf.summary.scalar('SSH_M1_LOSS/total_loss_m1', total_loss_m1)

    tf.summary.scalar('SSH_M2_LOSS/cls_loss_m2', cls_loss_m2)
    tf.summary.scalar('SSH_M2_LOSS/bbox_loss_m2', bbox_loss_m2)
    tf.summary.scalar('SSH_M2_LOSS/total_loss_m2', total_loss_m2)

    tf.summary.scalar('SSH_M3_LOSS/cls_loss_m3', cls_loss_m3)
    tf.summary.scalar('SSH_M3_LOSS/bbox_loss_m3', bbox_loss_m3)
    tf.summary.scalar('SSH_M3_LOSS/total_loss_m3', total_loss_m3)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(
        img_batch=img_batch,
        boxes=gtboxes_and_label[:, :-1],
        labels=gtboxes_and_label[:, -1])
    if cfgs.ADD_BOX_IN_TENSORBOARD:

        detections_in_img_m1 = \
            show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                     boxes=result_dict['final_bbox_m1'],
                                                                     labels=result_dict['final_category_m1'],
                                                                     scores=result_dict['final_scores_m1'])
        tf.summary.image('Compare/final_detection_m1', detections_in_img_m1)

        detections_in_img_m2 = \
            show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                     boxes=result_dict['final_bbox_m2'],
                                                                     labels=result_dict['final_category_m2'],
                                                                     scores=result_dict['final_scores_m2'])
        tf.summary.image('Compare/final_detection_m2', detections_in_img_m2)

        detections_in_img_m3 = \
            show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch,
                                                                     boxes=result_dict['final_bbox_m3'],
                                                                     labels=result_dict['final_category_m3'],
                                                                     scores=result_dict['final_scores_m3'])
        tf.summary.image('Compare/final_detection_m3', detections_in_img_m3)

    tf.summary.image('Compare/gtboxes', gtboxes_in_img)

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=30)

    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)

        if not os.path.exists(summary_path):
            os.makedirs(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):

            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            # start = time.time()
            # _, global_stepnp, img_name, totalLoss, summary_str = \
            #     sess.run(
            #         [train_op, global_step, img_name_batch, total_loss, summary_op])
            #
            # end = time.time()
            #
            # print(""" {}: step{}    image_name:{} |\t total_loss:{} |\t per_cost_time:{}s""" \
            #       .format(training_time, global_stepnp, str(img_name[0]), totalLoss,
            #               (end - start)))
            # summary_writer.add_summary(summary_str, global_stepnp)
            # summary_writer.flush()

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _, global_stepnp, img_name, totalLoss = \
                        sess.run(
                            [train_op, global_step, img_name_batch, total_loss])

                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t total_loss:{} |\t per_cost_time:{}s""" \
                          .format(training_time, global_stepnp, str(img_name[0]), totalLoss,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)