def __init__(self, config=DefaultModelConfig()):
        self.extractor = VGGFeatureExtractor(config.weights_file)
        self.rpn = RPNModel(config)
        self.rcnn = RCNNModel(config)

        self.weights_pre_trained = config.weights_file

        self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])
        self.im_info = tf.placeholder(tf.float32, shape=[None, 3])
        self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5])
        self.keep_prob = tf.placeholder(tf.float32)
Beispiel #2
0
def generate_anchors_tensor(height, width, cfg=DefaultModelConfig()):

    # since anchors per feature pixel is predefined, so we don't need make it tf
    anchors = generate_anchors(cfg.anchor_base_size, cfg.anchor_ratios,
                               cfg.anchor_scales)
    anchors_t = tf.convert_to_tensor(anchors, tf.float32)

    num_anchors_t = tf.convert_to_tensor(anchors.shape[0], tf.int32)
    # feature_maps_shape = tf.shape(feature_maps)
    all_anchors_t = generate_shifted_anchors_tensor(height, width, anchors_t,
                                                    cfg.feature_stride)

    return all_anchors_t, num_anchors_t
Beispiel #3
0
def _compute_targets(ex_rois, gt_rois, labels, cfg=DefaultModelConfig()):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4

    targets = bbox_transform(ex_rois, gt_rois)
    if cfg.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev
        targets = ((targets - np.array(cfg.BBOX_NORMALIZE_MEANS))
                   / np.array(cfg.BBOX_NORMALIZE_STDS))
    return np.hstack(
            (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
Beispiel #4
0
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes,
                 cfg=DefaultModelConfig()):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.BG_THRESH_HI) &
                       (max_overlaps >= cfg.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = int(min(bg_rois_per_this_image, bg_inds.size))
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels, cfg)

    bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
        bbox_target_data, num_classes, cfg.BBOX_INSIDE_WEIGHTS)

    return labels, rois, bbox_targets, bbox_inside_weights
def train_net(class_name):
    # define input data
    pascal_data_cfg = PascalVOC2007Input()
    if class_name != 'all':
        if class_name not in pascal_data_cfg.classes:
            return
        else:
            print('only train specific class: {}'.format(class_name))
            pascal_data_cfg.name += '_' + class_name
            pascal_data_cfg.train_set = class_name + '_' + pascal_data_cfg.train_set

    # define model
    vgg_faster_rcnn_cfg = DefaultModelConfig(pascal_data_cfg)
    # define train method
    default_train_cfg = DefaultTrainConfig()

    proc = Processor()
    proc.train_loop(pascal_data_cfg, vgg_faster_rcnn_cfg, default_train_cfg)
    def filter_roidb(self, cfg=DefaultModelConfig()):
        """Remove roidb entries that have no usable RoIs."""
        def is_valid(entry):
            # Valid images have:
            #   (1) At least one foreground RoI OR
            #   (2) At least one background RoI
            overlaps = entry['max_overlaps']
            # find boxes with sufficient overlap
            fg_inds = np.where(overlaps >= cfg.FG_THRESH)[0]
            # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
            bg_inds = np.where((overlaps < cfg.BG_THRESH_HI)
                               & (overlaps >= cfg.BG_THRESH_LO))[0]
            # image is only valid if such boxes exist
            valid = len(fg_inds) > 0 or len(bg_inds) > 0
            return valid

        num = len(self._roidb)
        filtered_roidb = [entry for entry in self._roidb if is_valid(entry)]
        num_after = len(filtered_roidb)
        print('Filtered {} roidb entries: {} -> {}'.format(
            num - num_after, num, num_after))

        self._roidb = filtered_roidb
        return filtered_roidb, num_after
Beispiel #7
0
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
#
#
# -----------------------------------------------------------------------------
from config.config_input import Pascal2007Test
from config.config_model import DefaultModelConfig
from config.config_train import DefaultTestConfig

from trainval.processor import Processor


# define input data
test_data_cfg = Pascal2007Test()

test_model_cfg = DefaultModelConfig(test_data_cfg)
test_cfg = DefaultTestConfig()

proc = Processor()
proc.test_loop(test_data_cfg, test_model_cfg, test_cfg)

print('test done')
 def __init__(self, config=DefaultModelConfig()):
     super(CNNModel, self).__init__()
     self.cfg = config
    def train_loop(self,
                   data=ImageInput(),
                   model=DefaultModelConfig(),
                   train=DefaultTrainConfig()):
        # prepare data
        reader = DataReader(data)
        roidb = reader.prepare_roidb()
        _, epe = roidb.filter_roidb(model)

        # build model
        model = VGGNetFasterRCNNModel(model)
        predict = model.inference(model.TrainEnd2End)
        # result = model.inference(model.Evaluation)

        # setup trainer
        trainer = Trainer(train)
        if epe != trainer.cfg.examples_per_epoch:
            print('set examples/epoch to {}'.format(epe))
            trainer.cfg.examples_per_epoch = epe
        if len(data.train_set.split('_')) > 1:
            name = data.train_set.split('_')[0]
            print('set output name to {}'.format(name))
            trainer.cfg.name = name

        loss, train_op = self.train_once(trainer)

        writer = tf.summary.FileWriter(trainer.cfg.train_dir)
        saver = tf.train.Saver()
        restorer = tf.train.Saver(write_version=tf.train.SaverDef.V1) \
            if trainer.cfg.restore_type == 'ckpt_v1' else saver

        max_iters = trainer.cfg.max_steps

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            # model.load_weights_pre_trained(sess)
            start_step = model.set_train_start_point(sess, restorer,
                                                     trainer.cfg.ckpt_file)

            writer.add_graph(sess.graph)
            merged = tf.summary.merge_all()

            for i in range(start_step, max_iters):
                start = time.time()

                blob = roidb.forward()
                feed = {
                    model.data: blob['data'],
                    model.gt_boxes: blob['gt_boxes'],
                    model.im_info: blob['im_info'],
                    model.keep_prob: 0.5
                }

                train_loss, train_pred, _, summary = sess.run(
                    [loss, predict, train_op, merged], feed_dict=feed)

                end = time.time()
                duration = end - start

                format_str = '%s: step %d, loss = %.2f (%.3f sec/batch)'
                print(format_str % (datetime.now(), i, train_loss, duration))

                if i % 10 == 0:
                    writer.add_summary(summary, i)

                if i % (trainer.cfg.save_frequency / 10) == 0 and i > 0:
                    file_name = 'train_iter_' + str(i) + '.ckpt'
                    save_path = trainer.cfg.ckpt_path_base + file_name
                    saver.save(sess, save_path)
                    print('save checkpoint to : {}'.format(save_path))
    def demo_loop(self,
                  data=ImageInput(),
                  model_cfg=DefaultModelConfig(),
                  weights_file_path=None):

        reader = DataReader(data)
        roidb = reader.prepare_roidb()
        _, epe = roidb.filter_roidb(model_cfg)

        # build model
        model = VGGNetFasterRCNNModel(model_cfg)
        pred_op = model.inference(model.Evaluation)

        saver = tf.train.Saver(write_version=tf.train.SaverDef.V1)
        # saver = tf.train.Saver()

        max_iters = epe
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # model.set_train_start_point(sess, saver, weights_file_path)
            saver.restore(sess, weights_file_path)
            # saver.restore(sess, tf.train.latest_checkpoint(weights_file_path))

            for i in range(max_iters):
                start = time.time()

                blob = roidb.forward()

                scale = blob['im_info'][0][-1]
                image = cv2.imread(blob['image'])
                image = cv2.resize(image,
                                   None,
                                   None,
                                   scale,
                                   scale,
                                   interpolation=cv2.INTER_LINEAR)

                feed = {
                    model.data: blob['data'],
                    model.gt_boxes: blob['gt_boxes'],
                    model.im_info: blob['im_info'],
                    model.keep_prob: 1.0
                }

                cls_prob, bbox_pred, rois = sess.run(pred_op, feed_dict=feed)

                dets = get_detect_results(data.classes, cls_prob,
                                          np.array(rois)[0][:, 1:5], bbox_pred,
                                          blob['im_info'])

                end = time.time()
                duration = end - start

                format_str = '%s: step %d, (%.3f sec/batch)'
                print(format_str % (datetime.now(), i, duration))

                image = draw_bboxes_classes_probs(image, cls_prob,
                                                  np.array(rois)[0][:, 1:5],
                                                  bbox_pred, blob['im_info'],
                                                  blob['gt_boxes'],
                                                  data.classes)
                cv2.imshow('result', image)
                cv2.waitKey()
    def test_loop(self,
                  data=ImageInput(),
                  model_cfg=DefaultModelConfig(),
                  test_cfg=DefaultTestConfig()):
        reader = DataReader(data)
        roidb = reader.prepare_roidb()
        _, epe = roidb.filter_roidb(model_cfg)

        # build model
        model = VGGNetFasterRCNNModel(model_cfg)
        pred_op = model.inference(model.Evaluation)

        saver = tf.train.Saver(write_version=tf.train.SaverDef.V1) \
            if test_cfg.restore_type == 'ckpt_v1' else tf.train.Saver()

        # epe = 20
        max_iters = epe

        all_boxes = [[[] for _ in range(epe)]
                     for _ in range(model_cfg.num_classes)]
        all_gt_boxes = [[] for _ in range(epe)]

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # model.set_train_start_point(sess, saver, weights_file_path)
            saver.restore(sess, test_cfg.ckpt_file)
            # saver.restore(sess, tf.train.latest_checkpoint(weights_file_path))

            for i in range(max_iters):
                start = time.time()

                blob = roidb.forward()

                scale = blob['im_info'][0][-1]
                image = cv2.imread(blob['image'])
                image = cv2.resize(image,
                                   None,
                                   None,
                                   scale,
                                   scale,
                                   interpolation=cv2.INTER_LINEAR)

                feed = {
                    model.data: blob['data'],
                    model.gt_boxes: blob['gt_boxes'],
                    model.im_info: blob['im_info'],
                    model.keep_prob: 1.0
                }

                cls_prob, bbox_pred, rois = sess.run(pred_op, feed_dict=feed)

                dets = get_detect_results(data.classes, cls_prob,
                                          np.array(rois)[0][:, 1:5], bbox_pred,
                                          blob['im_info'])

                for i_cls in range(1, len(dets)):
                    all_boxes[i_cls][i] = dets[i_cls]
                all_gt_boxes[i] = blob['gt_boxes']

                end = time.time()
                duration = end - start

                format_str = '%s: step %d, (%.3f sec/batch)'
                print(format_str % (datetime.now(), i, duration))

                if test_cfg.visual_while_test is True:
                    image = draw_bboxes_classes_probs(
                        image, cls_prob,
                        np.array(rois)[0][:, 1:5], bbox_pred, blob['im_info'],
                        blob['gt_boxes'], data.classes)

                    # image = image / blob['im_info'][0][-1]
                    # scale = 1.0 / blob['im_info'][0][-1]
                    # image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR)
                    # image += data.pixel_means
                    cv2.imshow('result', image)
                    cv2.waitKey()

        det_file = os.path.join(data.cache_path, data.name + '_detections.pkl')
        with open(det_file, 'wb') as f:
            cPickle.dump(all_boxes, f, protocol=-1)
        print('wrote detections results to {}'.format(det_file))

        detects_evaluation(all_boxes, all_gt_boxes, data.classes)
Beispiel #12
0
#
# -----------------------------------------------------------------------------
import tensorflow as tf
import numpy as np
from config.config_input import PascalVOC2007Input
from config.config_model import DefaultModelConfig
from config.config_train import DefaultTrainConfig

from data_process.data_reader import DataReader
from model.VGGnet_faster_rcnn import VGGNetFasterRCNNModel
from trainval.trainer import Trainer

# define input data
pascal_data_cfg = PascalVOC2007Input()
# define model
vgg_faster_rcnn_cfg = DefaultModelConfig(pascal_data_cfg)
# define train method
default_train_cfg = DefaultTrainConfig()

# prepare data
reader = DataReader(pascal_data_cfg)
roidb = reader.prepare_roidb()
roidb.filter_roidb(vgg_faster_rcnn_cfg)

# build model
model = VGGNetFasterRCNNModel(vgg_faster_rcnn_cfg)
predict = model.inference(model.TrainEnd2End)
# result = model.inference(model.Evaluation)

# setup trainer
trainer = Trainer(default_train_cfg)
Beispiel #13
0
def get_proposal_target_cfg(cfg=DefaultModelConfig()):
    tensor = [cfg.num_classes]
    return tensor