def __init__(self, config=DefaultModelConfig()): self.extractor = VGGFeatureExtractor(config.weights_file) self.rpn = RPNModel(config) self.rcnn = RCNNModel(config) self.weights_pre_trained = config.weights_file self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5]) self.keep_prob = tf.placeholder(tf.float32)
def generate_anchors_tensor(height, width, cfg=DefaultModelConfig()): # since anchors per feature pixel is predefined, so we don't need make it tf anchors = generate_anchors(cfg.anchor_base_size, cfg.anchor_ratios, cfg.anchor_scales) anchors_t = tf.convert_to_tensor(anchors, tf.float32) num_anchors_t = tf.convert_to_tensor(anchors.shape[0], tf.int32) # feature_maps_shape = tf.shape(feature_maps) all_anchors_t = generate_shifted_anchors_tensor(height, width, anchors_t, cfg.feature_stride) return all_anchors_t, num_anchors_t
def _compute_targets(ex_rois, gt_rois, labels, cfg=DefaultModelConfig()): """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) if cfg.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev targets = ((targets - np.array(cfg.BBOX_NORMALIZE_MEANS)) / np.array(cfg.BBOX_NORMALIZE_STDS)) return np.hstack( (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, cfg=DefaultModelConfig()): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.BG_THRESH_HI) & (max_overlaps >= cfg.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = int(min(bg_rois_per_this_image, bg_inds.size)) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels, cfg) bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( bbox_target_data, num_classes, cfg.BBOX_INSIDE_WEIGHTS) return labels, rois, bbox_targets, bbox_inside_weights
def train_net(class_name): # define input data pascal_data_cfg = PascalVOC2007Input() if class_name != 'all': if class_name not in pascal_data_cfg.classes: return else: print('only train specific class: {}'.format(class_name)) pascal_data_cfg.name += '_' + class_name pascal_data_cfg.train_set = class_name + '_' + pascal_data_cfg.train_set # define model vgg_faster_rcnn_cfg = DefaultModelConfig(pascal_data_cfg) # define train method default_train_cfg = DefaultTrainConfig() proc = Processor() proc.train_loop(pascal_data_cfg, vgg_faster_rcnn_cfg, default_train_cfg)
def filter_roidb(self, cfg=DefaultModelConfig()): """Remove roidb entries that have no usable RoIs.""" def is_valid(entry): # Valid images have: # (1) At least one foreground RoI OR # (2) At least one background RoI overlaps = entry['max_overlaps'] # find boxes with sufficient overlap fg_inds = np.where(overlaps >= cfg.FG_THRESH)[0] # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((overlaps < cfg.BG_THRESH_HI) & (overlaps >= cfg.BG_THRESH_LO))[0] # image is only valid if such boxes exist valid = len(fg_inds) > 0 or len(bg_inds) > 0 return valid num = len(self._roidb) filtered_roidb = [entry for entry in self._roidb if is_valid(entry)] num_after = len(filtered_roidb) print('Filtered {} roidb entries: {} -> {}'.format( num - num_after, num, num_after)) self._roidb = filtered_roidb return filtered_roidb, num_after
# -*- coding: utf-8 -*- # ----------------------------------------------------------------------------- # # # ----------------------------------------------------------------------------- from config.config_input import Pascal2007Test from config.config_model import DefaultModelConfig from config.config_train import DefaultTestConfig from trainval.processor import Processor # define input data test_data_cfg = Pascal2007Test() test_model_cfg = DefaultModelConfig(test_data_cfg) test_cfg = DefaultTestConfig() proc = Processor() proc.test_loop(test_data_cfg, test_model_cfg, test_cfg) print('test done')
def __init__(self, config=DefaultModelConfig()): super(CNNModel, self).__init__() self.cfg = config
def train_loop(self, data=ImageInput(), model=DefaultModelConfig(), train=DefaultTrainConfig()): # prepare data reader = DataReader(data) roidb = reader.prepare_roidb() _, epe = roidb.filter_roidb(model) # build model model = VGGNetFasterRCNNModel(model) predict = model.inference(model.TrainEnd2End) # result = model.inference(model.Evaluation) # setup trainer trainer = Trainer(train) if epe != trainer.cfg.examples_per_epoch: print('set examples/epoch to {}'.format(epe)) trainer.cfg.examples_per_epoch = epe if len(data.train_set.split('_')) > 1: name = data.train_set.split('_')[0] print('set output name to {}'.format(name)) trainer.cfg.name = name loss, train_op = self.train_once(trainer) writer = tf.summary.FileWriter(trainer.cfg.train_dir) saver = tf.train.Saver() restorer = tf.train.Saver(write_version=tf.train.SaverDef.V1) \ if trainer.cfg.restore_type == 'ckpt_v1' else saver max_iters = trainer.cfg.max_steps with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # model.load_weights_pre_trained(sess) start_step = model.set_train_start_point(sess, restorer, trainer.cfg.ckpt_file) writer.add_graph(sess.graph) merged = tf.summary.merge_all() for i in range(start_step, max_iters): start = time.time() blob = roidb.forward() feed = { model.data: blob['data'], model.gt_boxes: blob['gt_boxes'], model.im_info: blob['im_info'], model.keep_prob: 0.5 } train_loss, train_pred, _, summary = sess.run( [loss, predict, train_op, merged], feed_dict=feed) end = time.time() duration = end - start format_str = '%s: step %d, loss = %.2f (%.3f sec/batch)' print(format_str % (datetime.now(), i, train_loss, duration)) if i % 10 == 0: writer.add_summary(summary, i) if i % (trainer.cfg.save_frequency / 10) == 0 and i > 0: file_name = 'train_iter_' + str(i) + '.ckpt' save_path = trainer.cfg.ckpt_path_base + file_name saver.save(sess, save_path) print('save checkpoint to : {}'.format(save_path))
def demo_loop(self, data=ImageInput(), model_cfg=DefaultModelConfig(), weights_file_path=None): reader = DataReader(data) roidb = reader.prepare_roidb() _, epe = roidb.filter_roidb(model_cfg) # build model model = VGGNetFasterRCNNModel(model_cfg) pred_op = model.inference(model.Evaluation) saver = tf.train.Saver(write_version=tf.train.SaverDef.V1) # saver = tf.train.Saver() max_iters = epe with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # model.set_train_start_point(sess, saver, weights_file_path) saver.restore(sess, weights_file_path) # saver.restore(sess, tf.train.latest_checkpoint(weights_file_path)) for i in range(max_iters): start = time.time() blob = roidb.forward() scale = blob['im_info'][0][-1] image = cv2.imread(blob['image']) image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR) feed = { model.data: blob['data'], model.gt_boxes: blob['gt_boxes'], model.im_info: blob['im_info'], model.keep_prob: 1.0 } cls_prob, bbox_pred, rois = sess.run(pred_op, feed_dict=feed) dets = get_detect_results(data.classes, cls_prob, np.array(rois)[0][:, 1:5], bbox_pred, blob['im_info']) end = time.time() duration = end - start format_str = '%s: step %d, (%.3f sec/batch)' print(format_str % (datetime.now(), i, duration)) image = draw_bboxes_classes_probs(image, cls_prob, np.array(rois)[0][:, 1:5], bbox_pred, blob['im_info'], blob['gt_boxes'], data.classes) cv2.imshow('result', image) cv2.waitKey()
def test_loop(self, data=ImageInput(), model_cfg=DefaultModelConfig(), test_cfg=DefaultTestConfig()): reader = DataReader(data) roidb = reader.prepare_roidb() _, epe = roidb.filter_roidb(model_cfg) # build model model = VGGNetFasterRCNNModel(model_cfg) pred_op = model.inference(model.Evaluation) saver = tf.train.Saver(write_version=tf.train.SaverDef.V1) \ if test_cfg.restore_type == 'ckpt_v1' else tf.train.Saver() # epe = 20 max_iters = epe all_boxes = [[[] for _ in range(epe)] for _ in range(model_cfg.num_classes)] all_gt_boxes = [[] for _ in range(epe)] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # model.set_train_start_point(sess, saver, weights_file_path) saver.restore(sess, test_cfg.ckpt_file) # saver.restore(sess, tf.train.latest_checkpoint(weights_file_path)) for i in range(max_iters): start = time.time() blob = roidb.forward() scale = blob['im_info'][0][-1] image = cv2.imread(blob['image']) image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR) feed = { model.data: blob['data'], model.gt_boxes: blob['gt_boxes'], model.im_info: blob['im_info'], model.keep_prob: 1.0 } cls_prob, bbox_pred, rois = sess.run(pred_op, feed_dict=feed) dets = get_detect_results(data.classes, cls_prob, np.array(rois)[0][:, 1:5], bbox_pred, blob['im_info']) for i_cls in range(1, len(dets)): all_boxes[i_cls][i] = dets[i_cls] all_gt_boxes[i] = blob['gt_boxes'] end = time.time() duration = end - start format_str = '%s: step %d, (%.3f sec/batch)' print(format_str % (datetime.now(), i, duration)) if test_cfg.visual_while_test is True: image = draw_bboxes_classes_probs( image, cls_prob, np.array(rois)[0][:, 1:5], bbox_pred, blob['im_info'], blob['gt_boxes'], data.classes) # image = image / blob['im_info'][0][-1] # scale = 1.0 / blob['im_info'][0][-1] # image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR) # image += data.pixel_means cv2.imshow('result', image) cv2.waitKey() det_file = os.path.join(data.cache_path, data.name + '_detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=-1) print('wrote detections results to {}'.format(det_file)) detects_evaluation(all_boxes, all_gt_boxes, data.classes)
# # ----------------------------------------------------------------------------- import tensorflow as tf import numpy as np from config.config_input import PascalVOC2007Input from config.config_model import DefaultModelConfig from config.config_train import DefaultTrainConfig from data_process.data_reader import DataReader from model.VGGnet_faster_rcnn import VGGNetFasterRCNNModel from trainval.trainer import Trainer # define input data pascal_data_cfg = PascalVOC2007Input() # define model vgg_faster_rcnn_cfg = DefaultModelConfig(pascal_data_cfg) # define train method default_train_cfg = DefaultTrainConfig() # prepare data reader = DataReader(pascal_data_cfg) roidb = reader.prepare_roidb() roidb.filter_roidb(vgg_faster_rcnn_cfg) # build model model = VGGNetFasterRCNNModel(vgg_faster_rcnn_cfg) predict = model.inference(model.TrainEnd2End) # result = model.inference(model.Evaluation) # setup trainer trainer = Trainer(default_train_cfg)
def get_proposal_target_cfg(cfg=DefaultModelConfig()): tensor = [cfg.num_classes] return tensor