def preprocess(self):#300519 defined explicitly X, ratio = self.format_img()#140619 if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) [Y1, Y2, F] = self._model_rpn.predict(X)#300519 R = roi_helpers.rpn_to_roi(Y1, Y2, self._config, K.image_dim_ordering(), overlap_thresh=0.7) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] bboxes = {} probs = {} bbox_threshold = 0.8 class_mapping = self._config['class_mapping']#300519 for jk in range(R.shape[0] // self._config['num_rois'] + 1): ROIs = np.expand_dims(R[self._config['num_rois'] * jk:self._config['num_rois'] * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // self._config['num_rois']: # pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], self._config['num_rois'], curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = self._model_classifier.predict([F, ROIs])#300519 for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= self._config['classifier_regr_std'][0] ty /= self._config['classifier_regr_std'][1] tw /= self._config['classifier_regr_std'][2] th /= self._config['classifier_regr_std'][3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([self._config['rpn_stride'] * x, self._config['rpn_stride'] * y, self._config['rpn_stride'] * (x + w), self._config['rpn_stride'] * (y + h)]) probs[cls_name].append(np.max(P_cls[0, ii, :])) return [bboxes, probs, ratio]#14619 added ratio
def getLosses(clss, regr, img_data, C, module): R = roi_helpers.rpn_to_roi(clss, regr, C, K.image_dim_ordering(), module, use_regr=True, overlap_thresh=0.5, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping, module) if X2 is None: return -1 neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] return len(pos_samples)
def ROI(Y1, Y2, C, K, module): # R, prob = roi_helpers.rpn_to_roi(Y1, Y2, C, K, module, overlap_thresh=0.5) box, prob = roi_helpers.rpn_to_roi(Y1, Y2, C, K, module, overlap_thresh=0.3, max_boxes=1000) modules = np.zeros(prob.shape) modules[:] = int(module[-1]) # convert from (x1,y1,x2,y2) to (x,y,w,h) # R[:, 2] -= R[:, 0] # R[:, 3] -= R[:, 1] # return R, prob # print(R) return box, prob, modules
def getLosses(clss, regr, img_data, C, module): R = roi_helpers.rpn_to_roi(clss, regr, C, K.image_dim_ordering(), module, use_regr=True, overlap_thresh=0.5, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping, module) print(module) print(R) # print(X2, Y1, Y2, IouS) x_img = cv2.imread(img_data['filepath']) (width, height) = (img_data['width'], img_data['height']) (rows, cols, _) = x_img.shape (resized_width, resized_height) = data_generators.get_new_img_size( width, height, C.im_size) x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) final_image = np.zeros((C.im_size, C.im_size, 3)) final_image[:resized_height, :resized_width, :] = x_img x_img = final_image if X2 is None: return -1, -1 neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) # cv2.rectangle(img, (x1_gt, y1_gt), (x2_gt, y2_gt), (0, 255, 0), 1) if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] return len(pos_samples), pos_samples
if __name__ == '__main__': C = config.Config() arg = '2frame0.jpg' test_image = prepare(arg) # (train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() # train_images, test_images = train_images / 255.0, test_images / 255.0 input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) anchor_box_scales = [128, 256, 512] anchor_box_ratios = [[1, 1], [1, 2], [2, 1]] num_anchors = len(anchor_box_scales) * len(anchor_box_ratios) features = VGG(10)(img_input) rpn = RPN(num_anchors)(features) model = Model(inputs=img_input, outputs=rpn) model.compile(optimizer='sgd', loss='mse') test = test_image.reshape(1, 160, 160, 3) values = model.predict(test) R = roi_helpers.rpn_to_roi(values[0], values[1], C, K.image_data_format(), use_regr=True, overlap_thresh=0.7, max_boxes=300) view_region(test_image[0], R)
def train_rpn(): # 读取配置 cfg = config.Config() # 将图像及VOC格式的数据以Img_Data对象的形式进行保存 all_images, classes_count, class_mapping = get_data(cfg.label_file) cfg.class_mapping = class_mapping # for bbox_num, bbox in enumerate(all_images[0].bboxes): # print(bbox_num, bbox) # 将配置文件进行保存 with open(cfg.config_save_file, 'wb') as config_f: pickle.dump(cfg, config_f) print('2、Config已经被写入到{}, 并且可以在测试的时候加载以确保得到正确的结果'.format( cfg.config_save_file)) print("3、按照类别数量大小顺序输出") pprint.pprint(classes_count) print("4、类别个数(大于训练集+测试集数量,并且包括背景)= {}".format(len(classes_count))) random.shuffle(all_images) print("5、对样本进行打乱") train_imgs = [img_data for img_data in all_images if img_data.imageset == 'trainval'] val_imgs = [img_data for img_data in all_images if img_data.imageset == 'test'] print("6、设置训练集及验证集,其中训练集数量为{},测试集数量为{}".format(len(train_imgs), len(val_imgs))) # 对训练数据进行打乱 random.shuffle(train_imgs) # 类别映射 # 得到每一个锚的训练数据,供RPN网络训练使用 data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_out_length, K.image_dim_ordering(), mode='train') # data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length, # K.image_dim_ordering(), mode='val') if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors, cfg.num_regions) model_rpn = Model(img_input, rpn[:2]) try: print('7、从{}加载参数'.format(cfg.base_net_weights)) model_rpn.load_weights(cfg.model_path, by_name=True) except Exception as e: print(e) print('无法加载与训练模型权重 ') optimizer = Adam(lr=1e-5) model_rpn.compile(optimizer=optimizer, loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)]) epoch_length = 500 num_epochs = int(cfg.num_epochs) iter_num = 0 losses = np.zeros((epoch_length, 2)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf print('8、开始训练') for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch{}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == epoch_length and cfg.verbose: mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print('RPN的bounding boxes平均覆盖数量 = {} for {} previous iterations'.format( mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print('RPN不生产覆盖的边框,检查RPN的设置或者继续训练') X, Y, img_data = next(data_gen_train) # Y[0].shape (1, X, Y, 18) # X是input data,Y是labels loss_rpn = model_rpn.train_on_batch(X, Y) p_rpn = model_rpn.predict_on_batch(X) result = roi_helpers.rpn_to_roi(p_rpn[0], p_rpn[1], cfg, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=10) # visual_rpn(img_data, result) print('-------result------') # print('-------result--------') # print(result[250]) # print('-------result--------') # print('-------p_rpn--------') # print(p_rpn[0].shape) # print(p_rpn[1].shape) # (1, 38, 48, 9) # (1, 38, 48, 36) # print('-------p_rpn--------') losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] iter_num += 1 progbar.update(iter_num, [('rpn分类损失', np.mean(losses[:iter_num, 0])), ('rpn回归损失', np.mean(losses[:iter_num, 1]))]) if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) print(loss_rpn_cls, loss_rpn_regr) # mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) // len(rpn_accuracy_for_epoch) # print(mean_overlapping_bboxes) # rpn_accuracy_for_epoch = [] if cfg.verbose: # print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if cfg.verbose: ('总损失函数从{}减到{},保存权重'.format(best_loss, curr_loss)) best_loss = curr_loss model_rpn.save_weights(cfg.model_path) break except Exception as e: print('错误{}'.format(e)) # 保存模型 model_rpn.save_weights(cfg.model_path) continue print('训练完成,退出')
filepath = os.path.join(img_path,img_name) img = cv2.imread(filepath) X, ratio = format_img(img, C) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0]//C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0) if ROIs.shape[1] == 0: break
.format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' ) X, Y, img_data = next(data_gen_train) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0)
img_path = options.test_path for idx,img_name in enumerate(sorted(os.listdir(img_path))): filename = img_path+'/'+img_name img = io.imread(filename) img, ratio = format_img(img) img = np.transpose(img, (0, 2, 3, 1)) #print(img.shape) [features,x_class,x_reg]= model_rpn.predict(img) #print(x_class.shape[1:3]) R = roi_helpers.rpn_to_roi(x_class ,x_reg, C, dim_ordering='tf', overlap_thresh=0.7) #print(R.shape) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0]//C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0) if ROIs.shape[1] == 0: break
visualise = True # for idx, img_name in enumerate(sorted(os.listdir(img_path))): # if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): # continue # print(img_name) st = time.time() # filepath = os.path.join(img_path, img_name) filepath = 'test.jpg' img = cv2.imread(filepath) X, ratio = format_img(img, C) if K.image_data_format() == 'channels_last': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi( Y1, Y2, C, K.image_data_format(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0]//C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0]//C.num_rois: # pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
if mean_overlapping_bboxes == 0: print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.') ### Stehen lassen ### Training ### # Generate X (x_img) & Label Y ([y_rpn_cls, y_rpn_regr]) X, Y, img_data, debug_img, debug_num_pos = next(data_gen_train) # Train RPN model & get loss-value loss_rpn = model_rpn.train_on_batch(X, Y) # Get predicted RPN from RPN model P_rpn = model_rpn.predict_on_batch(X) # Conversion of RPN layer to ROI boxes R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, use_regr = True, overlap_thresh = 0.7, max_boxes = 300) # Function calc_iou converts from (x1, y1, x2, y2) to (x, y, w, h) X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping) # If there are no matching boxes if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue # Positive & negative anchors neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: