Example #1
0
    def detect_and_save(self, img):
        boxes = self.detect_on_image(img)
        X, ratio = format_img(img, self.cfg)
        for cls_num, box in boxes.items():
            boxes_nms = roi_helpers.non_max_suppression_fast(
                box, overlap_thresh=0.5)
            boxes[cls_num] = boxes_nms
            print(self.class_mapping[cls_num] + ":")
            for b in boxes_nms:
                b[0], b[1], b[2], b[3] = get_real_coordinates(
                    ratio, b[0], b[1], b[2], b[3])
                print('{} prob: {}'.format(b[0:4], b[-1]))
        img = draw_boxes_and_label_on_image_cv2(img, self.class_mapping, boxes)

        result_path = './results_images/{}.png'.format('result')
        print('result saved into ', result_path)
        cv2.imwrite(result_path, img)
        cv2.waitKey(0)
Example #2
0
def predict_single_image(img_path, model_rpn, model_classifier_only, cfg,
                         class_mapping):
    st = time.time()
    img = cv2.imread(img_path)
    if img is None:
        print('reading image failed.')
        exit(0)

    X, ratio = format_img(img, cfg)
    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))
    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    # this is result contains all boxes, which is [x1, y1, x2, y2]
    result = roi_helpers.rpn_to_roi(Y1,
                                    Y2,
                                    cfg,
                                    K.image_dim_ordering(),
                                    overlap_thresh=0.7)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    result[:, 2] -= result[:, 0]
    result[:, 3] -= result[:, 1]
    bbox_threshold = 0.8

    # apply the spatial pyramid pooling to the proposed regions
    boxes = dict()
    for jk in range(result.shape[0] // cfg.num_rois + 1):
        rois = np.expand_dims(result[cfg.num_rois * jk:cfg.num_rois *
                                     (jk + 1), :],
                              axis=0)
        if rois.shape[1] == 0:
            break
        if jk == result.shape[0] // cfg.num_rois:
            # pad R
            curr_shape = rois.shape
            target_shape = (curr_shape[0], cfg.num_rois, curr_shape[2])
            rois_padded = np.zeros(target_shape).astype(rois.dtype)
            rois_padded[:, :curr_shape[1], :] = rois
            rois_padded[0, curr_shape[1]:, :] = rois[0, 0, :]
            rois = rois_padded

        [p_cls, p_regr] = model_classifier_only.predict([F, rois])

        for ii in range(p_cls.shape[1]):
            if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    p_cls[0, ii, :]) == (p_cls.shape[2] - 1):
                continue

            cls_num = np.argmax(p_cls[0, ii, :])
            if cls_num not in boxes.keys():
                boxes[cls_num] = []
            (x, y, w, h) = rois[0, ii, :]
            try:
                (tx, ty, tw, th) = p_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= cfg.classifier_regr_std[0]
                ty /= cfg.classifier_regr_std[1]
                tw /= cfg.classifier_regr_std[2]
                th /= cfg.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except Exception as e:
                print(e)
                pass
            boxes[cls_num].append([
                cfg.rpn_stride * x, cfg.rpn_stride * y,
                cfg.rpn_stride * (x + w), cfg.rpn_stride * (y + h),
                np.max(p_cls[0, ii, :])
            ])
    # add some nms to reduce many boxes
    for cls_num, box in boxes.items():
        boxes_nms = roi_helpers.non_max_suppression_fast(box,
                                                         overlap_thresh=0.5)
        boxes[cls_num] = boxes_nms
        print(class_mapping[cls_num] + ":")
        for b in boxes_nms:
            b[0], b[1], b[2], b[3] = get_real_coordinates(
                ratio, b[0], b[1], b[2], b[3])
            print('{} prob: {}'.format(b[0:4], b[-1]))
    img = draw_boxes_and_label_on_image_cv2(img, class_mapping, boxes)
    print('Elapsed time = {}'.format(time.time() - st))
    cv2.imshow('image', img)
    result_path = './results_images/{}.png'.format(
        os.path.basename(img_path).split('.')[0])
    print('result saved into ', result_path)
    cv2.imwrite(result_path, img)
    cv2.waitKey(0)
Example #3
0
def predict_single_image(img_path, model_rpn, model_classifier_only, cfg,
                         class_mapping):
    st = datetime.datetime.now()
    img = cv2.imread(img_path)
    width = img.shape[1]
    height = img.shape[0]
    if img is None:
        print('reading image failed.')
        exit(0)

#==============================================================================
#     #add a bilateralfilter & histequalize(this part is now moved to another code)
#     img = cv2.bilateralFilter(img,9,75,75)
#     img = cv2.equalizeHist(img)
#     img = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
#==============================================================================
    X, ratio = format_img(img, cfg)
    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))
    [Y1, Y2, F] = model_rpn.predict(X)

    #==============================================================================
    #     # get the feature maps and output from the RPN
    #     model_conv1 = Model(inputs=model_rpn.input,
    #                                      outputs=model_rpn.get_layer('activation_40').output)
    #     feature1 = model_conv1.predict(X)
    #     print(feature1.shape)
    #     #show the feature maps
    #     images_per_row = int(math.sqrt(feature1.shape[-1]))
    #     n_features = feature1.shape[-1]
    #     feature_sizex = feature1.shape[1]
    #     feature_sizey = feature1.shape[2]
    #     n_cols = n_features // images_per_row
    #     display_grid = np.zeros((feature_sizex * n_cols, images_per_row * feature_sizey))
    #     for col in range(n_cols):
    #         for row in range(images_per_row):
    #             channel_image = feature1[0,:, :,col * images_per_row + row].copy()
    #             # Post-process the feature to make it visually palatable
    #             channel_image -= channel_image.mean()
    #             channel_image /= channel_image.std()
    #             channel_image *= 64
    #             channel_image += 128
    #             channel_image = np.clip(channel_image, 0, 255).astype('uint8')
    #             display_grid[col * feature_sizex : (col + 1) * feature_sizex,
    #                          row * feature_sizey : (row + 1) * feature_sizey] = channel_image
    #
    #     # Display the grid
    #     scalex = 1. / feature_sizex
    #     scaley = 1. / feature_sizey
    #     plt.figure(figsize=(scaley * display_grid.shape[1],
    #                         scalex * display_grid.shape[0]))
    #     print(display_grid.shape)
    #     plt.title('feature_map')
    #     plt.grid(False)
    #     plt.imshow(display_grid, aspect='equal', cmap='viridis')
    #     plt.show()
    #==============================================================================

    # this is result contains all boxes, which is [x1, y1, x2, y2]
    result = roi_helpers.rpn_to_roi(Y1,
                                    Y2,
                                    cfg,
                                    K.image_dim_ordering(),
                                    overlap_thresh=0.7)
    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    result[:, 2] -= result[:, 0]
    result[:, 3] -= result[:, 1]
    bbox_threshold = 0.8

    # apply the spatial pyramid pooling to the proposed regions
    boxes = dict()
    for jk in range(result.shape[0] // cfg.num_rois + 1):
        rois = np.expand_dims(result[cfg.num_rois * jk:cfg.num_rois *
                                     (jk + 1), :],
                              axis=0)
        if rois.shape[1] == 0:
            break
        if jk == result.shape[0] // cfg.num_rois:
            # pad R
            curr_shape = rois.shape
            target_shape = (curr_shape[0], cfg.num_rois, curr_shape[2])
            rois_padded = np.zeros(target_shape).astype(rois.dtype)
            rois_padded[:, :curr_shape[1], :] = rois
            rois_padded[0, curr_shape[1]:, :] = rois[0, 0, :]
            rois = rois_padded

        [p_cls, p_regr] = model_classifier_only.predict([F, rois])
        #        print(p_cls)
        for ii in range(p_cls.shape[1]):
            if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    p_cls[0, ii, :]) == (p_cls.shape[2] - 1):
                continue

            cls_num = np.argmax(p_cls[0, ii, :])
            if cls_num not in boxes.keys():
                boxes[cls_num] = []
            (x, y, w, h) = rois[0, ii, :]
            try:
                (tx, ty, tw, th) = p_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= cfg.classifier_regr_std[0]
                ty /= cfg.classifier_regr_std[1]
                tw /= cfg.classifier_regr_std[2]
                th /= cfg.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except Exception as e:
                print(e)
                pass
            boxes[cls_num].append([
                cfg.rpn_stride * x, cfg.rpn_stride * y,
                cfg.rpn_stride * (x + w), cfg.rpn_stride * (y + h),
                np.max(p_cls[0, ii, :])
            ])
    # add some nms to reduce many boxes
    for cls_num, box in boxes.items():
        boxes_nms = roi_helpers.non_max_suppression_fast(box,
                                                         overlap_thresh=0.4)
        boxes[cls_num] = boxes_nms
        print(class_mapping[cls_num] + ":")
        for b in boxes_nms:
            b[0], b[1], b[2], b[3] = get_real_coordinates(
                ratio, b[0], b[1], b[2], b[3], width, height)
            print('{} prob: {}'.format(b[0:4], b[-1]))
    img = draw_boxes_and_label_on_image_cv2(img, class_mapping, boxes)
    print('Elapsed time = {}'.format(datetime.datetime.now() - st))
    result_path = './result_images/{}.jpg'.format('.'.join(
        os.path.basename(img_path).split('.')[:-1]))
    print('result saved into ', result_path)
    #    cv2.imwrite(result_path, img)
    #draw groundtruth box
    all_images = open('mito_simple_label_d+e.txt', 'r')
    for image in all_images:
        image = image.strip()
        [filepath, x1, y1, x2, y2, cls_name] = image.split(',')
        if img_path.split('\\')[-1] == filepath.split('\\')[-1]:
            x1 = int(x1)
            y1 = int(y1)
            x2 = int(x2)
            y2 = int(y2)
            print('ground truth bbox: [{},{},{},{}]'.format(x1, y1, x2, y2))
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 8)
            text_label = '{}'.format(cls_name)
            (ret_val, base_line) = cv2.getTextSize(text_label,
                                                   cv2.FONT_HERSHEY_COMPLEX, 1,
                                                   1)
            text_org = (x1, y1 - 0)
            cv2.rectangle(img, (text_org[0] - 1, text_org[1] + base_line - 80),
                          (text_org[0] + ret_val[0] + 120,
                           text_org[1] - ret_val[1] + 40), (0, 0, 255), 1)
            # this rectangle for fill text rect
            cv2.rectangle(img, (text_org[0] - 1, text_org[1] + base_line - 80),
                          (text_org[0] + ret_val[0] + 120,
                           text_org[1] - ret_val[1] + 40), (0, 0, 255), -1)
            cv2.putText(img, text_label, text_org, cv2.FONT_HERSHEY_DUPLEX,
                        1.5, (255, 255, 255), 3)
            cv2.imwrite(result_path, img)
    img = cv2.resize(img, (600, 621), interpolation=cv2.INTER_CUBIC)
    cv2.imshow('result', img)
    cv2.waitKey(5000)
    all_images.close()
Example #4
0
def predict_single_image(img_path, model_rpn, model_classifier_only, cfg,
                         class_mapping, Ap):
    st = time.time()
    img = cv2.imread(img_path)
    if img is None:
        print('reading image failed.')
        exit(0)

    X, ratio = format_img(img, cfg)
    # 如果用的是tensorflow内核,需要将图片的深度变换到最后一位。
    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))
    # 进行区域预测
    # get the feature maps and output from the RPN
    # Y1: anchor包含物体的概率
    # Y2:每一个anchor对应的回归梯度
    #  F:卷积后的特征图
    [Y1, Y2, F] = model_rpn.predict(X)

    # this is result contains all boxes, which is [x1, y1, x2, y2]
    # result是一个又一个框
    result = roi_helpers.rpn_to_roi(Y1,
                                    Y2,
                                    cfg,
                                    K.image_dim_ordering(),
                                    overlap_thresh=0.7)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    result[:, 2] -= result[:, 0]
    result[:, 3] -= result[:, 1]
    bbox_threshold = 0.50

    # apply the spatial pyramid pooling to the proposed regions
    boxes = dict()
    # 分批训练,每一次遍历num_rois个预选框,总共要(result.shape[0] // cfg.num_rois + 1)次
    # 遍历所有的预选框,需要注意的是每一次预选框的个数为num_rois
    for jk in range(result.shape[0] // cfg.num_rois + 1):
        # 取出num_rois个预选框,并增加一个维度(注:当不满一个num_rois,自动只取到最后一个)
        rois = np.expand_dims(result[cfg.num_rois * jk:cfg.num_rois *
                                     (jk + 1), :],
                              axis=0)
        # 没框
        if rois.shape[1] == 0:
            break
        # 当最后一次取不足num_rois个预选框时,补第一个框使其达到num_rois个
        if jk == result.shape[0] // cfg.num_rois:
            # pad R
            # 得到当前rois的shape
            curr_shape = rois.shape
            # 得到目标rois的shape
            target_shape = (curr_shape[0], cfg.num_rois, curr_shape[2])
            # 创建一个元素都为0的目标rois
            rois_padded = np.zeros(target_shape).astype(rois.dtype)
            # 将目标rois前面用现在的rois填充
            rois_padded[:, :curr_shape[1], :] = rois
            # 剩下的用第一个框填充
            rois_padded[0, curr_shape[1]:, :] = rois[0, 0, :]
            rois = rois_padded

        # 进行类别预测和边框回归
        # p_cls:该边框属于某一类别的概率
        # p_regr:每一个类别对应的边框回归梯度
        # F:rpn网络得到的卷积后的特征图
        # rois:处理得到的区域预选框
        [p_cls, p_regr] = model_classifier_only.predict([F, rois])
        # 遍历每一个预选框(p_cls.shape[1]预选框的个数)
        for ii in range(p_cls.shape[1]):
            # 如果该预选框的最大概率小于设定的阈值(即预测的肯定程度大于一定的值,我们才认为这次的类别的概率预测是有效的)
            # 或者最大的概率出现在背景上,则认为这个预选框是无效的,进行下一次预测。
            # p_cls[0, ii, :]类
            if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    p_cls[0, ii, :]) == (p_cls.shape[2] - 1):
                continue
            # 不属于上面的两种情况,取最大的概率点处为此边框的类别得到其名称。
            cls_num = np.argmax(p_cls[0, ii, :])
            # 创建两个list,用于存放不同类别对应的边框和概率
            if cls_num not in boxes.keys():
                # cls_num类别对应的编号
                boxes[cls_num] = []
            # 得到该预选框的信息
            (x, y, w, h) = rois[0, ii, :]  # ii是框
            try:
                # 根据类别对应的编号得到该类的边框回归梯度
                (tx, ty, tw, th) = p_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                # 对回归梯度进行规整化
                tx /= cfg.classifier_regr_std[0]
                ty /= cfg.classifier_regr_std[1]
                tw /= cfg.classifier_regr_std[2]
                th /= cfg.classifier_regr_std[3]
                # 对预测的边框进行修正
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except Exception as e:
                print(e)
                pass
            # 向相应的类里添加信息。
            # cfg.rpn_stride,边框的预测都是在特征图上进行的,要将其映射到规整后的原图上。是16吧?
            boxes[cls_num].append([
                cfg.rpn_stride * x, cfg.rpn_stride * y,
                cfg.rpn_stride * (x + w), cfg.rpn_stride * (y + h),
                np.max(p_cls[0, ii, :])
            ])

    print(boxes)  # add by me
    result_txt_filename = "./predict_labels/" + os.path.basename(
        img_path).split('.')[0] + ".txt"

    with open(result_txt_filename, 'w') as f:
        for cls_num, box in boxes.items():
            # add some nms to reduce many boxes
            # 进行NMS
            boxes_nms = roi_helpers.non_max_suppression_fast(
                box, overlap_thresh=0.5)
            boxes[cls_num] = boxes_nms
            print(class_mapping[cls_num] + ":")
            accall = 0

            for b in boxes_nms:
                b[0], b[1], b[2], b[3] = get_real_coordinates(
                    ratio, b[0], b[1], b[2], b[3])
                print('{} prob: {}'.format(b[0:4], b[-1]))
                accall += b[-1]
                #print("accall:".format(accall))
                f.write('{} {} {} {} {} {}\n'.format(class_mapping[cls_num],
                                                     b[-1], b[0], b[1], b[2],
                                                     b[3]))
            print("accall:{}".format(accall))
            avg = accall / len(boxes_nms)
            print("{} acc:{}".format(class_mapping[cls_num], avg))
            Ap[cls_num].append(avg)

    img = draw_boxes_and_label_on_image_cv2(img, class_mapping, boxes)
    print('Elapsed time = {}'.format(time.time() - st))
    # cv2.imshow('image', img) # 显示图片 # 注释掉

    result_path = './predict_images/{}.png'.format(
        os.path.basename(img_path).split('.')[0])
    print('result saved into ', result_path)
    cv2.imwrite(result_path, img)  # 显示图片的操作