コード例 #1
0
def encode_quads(y, epsilon=10e-5):
    df_bboxes = textboxes_utils.get_bboxes_from_quads(np.reshape(y[:, -16:-8], (-1, 4, 2)))
    y_encoded = y.copy()
    # encoded_x = (x - dx) / (dw) / sqrt(var(x))
    y_encoded[:, [-24, -22, -20, -18]] = (y[:, [-24, -22, -20, -18]] - y[:, [-16, -14, -12, -10]]) / np.tile(np.expand_dims(df_bboxes[:, 2], axis=-1), (1, 4)) / np.sqrt(y[:, [-8, -6, -4, -2]])
    # encoded_y = (y - dy) / (dh) / sqrt(var(y))
    y_encoded[:, [-23, -21, -19, -17]] = (y[:, [-23, -21, -19, -17]] - y[:, [-15, -13, -11, -9]]) / np.tile(np.expand_dims(df_bboxes[:, 3], axis=-1), (1, 4)) / np.sqrt(y[:, [-7, -5, -3, -1]])
    return y_encoded
コード例 #2
0
input_size = config["model"]["input_size"]
model_config = config["model"]

if model_config["name"] == "ssd_vgg16":
    model, label_maps, process_input_fn, image = inference_utils.inference_ssd_vgg16(
        config, args)
elif model_config["name"] == "ssd_mobilenetv1":
    model, label_maps, process_input_fn, image, bboxes, classes = inference_utils.inference_ssd_mobilenetv1(
        config, args)
elif model_config["name"] == "ssd_mobilenetv2":
    model, label_maps, process_input_fn, image, bboxes, classes = inference_utils.inference_ssd_mobilenetv2(
        config, args)
elif model_config["name"] == "tbpp_vgg16":
    model, label_maps, process_input_fn, image, quads, classes = inference_utils.inference_tbpp_vgg16(
        config, args)
    bboxes = textboxes_utils.get_bboxes_from_quads(quads)
elif model_config["name"] == "qssd_vgg16":
    model, label_maps, process_input_fn, image = inference_utils.inference_qssd_vgg16(
        config, args)
else:
    print(
        f"model with name ${model_config['name']} has not been implemented yet")
    exit()

model.load_weights(args.weights)


for idx, input_image in enumerate(list(glob(args.images))):
    image = cv2.imread(input_image)  # read image in bgr format
    image = np.array(image, dtype=np.float)
    image = np.uint8(image)
コード例 #3
0
    def __get_data(self, batch):
        X = []
        y = self.input_template.copy()

        for batch_idx, sample_idx in enumerate(batch):
            image_path, label_path = self.samples[sample_idx].split(" ")
            image, quads = textboxes_utils.read_sample(image_path=image_path,
                                                       label_path=label_path)
            quads = textboxes_utils.sort_quads_vertices(quads)

            if self.perform_augmentation:
                image, quads, _ = self.__augment(
                    image=image,
                    quads=quads,
                    classes=None,
                )

            bboxes = textboxes_utils.get_bboxes_from_quads(quads)
            image_height, image_width, _ = image.shape
            height_scale, width_scale = self.input_size / image_height, self.input_size / image_width
            input_img = cv2.resize(np.uint8(image),
                                   (self.input_size, self.input_size))
            input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
            input_img = self.process_input_fn(input_img)

            gt_classes = np.zeros((quads.shape[0], self.num_classes))
            gt_textboxes = np.zeros((quads.shape[0], 12))
            default_boxes = y[batch_idx, :, -8:]

            for i in range(bboxes.shape[0]):
                bbox = bboxes[i]
                quad = quads[i]
                cx = bbox[0] * width_scale / self.input_size
                cy = bbox[1] * height_scale / self.input_size
                width = bbox[2] * width_scale / self.input_size
                height = bbox[3] * height_scale / self.input_size
                q_x1 = quad[0, 0] * width_scale / self.input_size
                q_y1 = quad[0, 1] * height_scale / self.input_size
                q_x2 = quad[1, 0] * width_scale / self.input_size
                q_y2 = quad[1, 1] * height_scale / self.input_size
                q_x3 = quad[2, 0] * width_scale / self.input_size
                q_y3 = quad[2, 1] * height_scale / self.input_size
                q_x4 = quad[3, 0] * width_scale / self.input_size
                q_y4 = quad[3, 1] * height_scale / self.input_size
                gt_textboxes[i, :4] = [cx, cy, width, height]
                gt_textboxes[i, 4:] = [
                    q_x1, q_y1, q_x2, q_y2, q_x3, q_y3, q_x4, q_y4
                ]
                gt_classes[i] = [0, 1]

            # image = cv2.resize(np.uint8(image), (self.input_size, self.input_size))

            # print(gt_textboxes.shape)

            # for bbox in bboxes:
            #     cx = bbox[0] * width_scale
            #     cy = bbox[1] * height_scale
            #     width = bbox[2] * width_scale
            #     height = bbox[3] * height_scale
            #     cv2.rectangle(
            #         image,
            #         (int(cx - (width / 2)), int(cy - (height / 2))),
            #         (int(cx + (width / 2)), int(cy + (height / 2))),
            #         (0, 0, 255),
            #         1
            #     )

            # for i, gt_textbox in enumerate(gt_textboxes):
            #     quad = gt_textbox[4:]
            #     q_x1 = quad[0] * self.input_size
            #     q_y1 = quad[1] * self.input_size
            #     q_x2 = quad[2] * self.input_size
            #     q_y2 = quad[3] * self.input_size
            #     q_x3 = quad[4] * self.input_size
            #     q_y3 = quad[5] * self.input_size
            #     q_x4 = quad[6] * self.input_size
            #     q_y4 = quad[7] * self.input_size
            #     bbox = gt_textbox[:4]
            #     print([
            #         bbox[0] * self.input_size,
            #         bbox[1] * self.input_size,
            #         bbox[2] * self.input_size,
            #         bbox[3] * self.input_size,
            #     ])
            #     print(bboxes[i])
            #     cx = bbox[0] * self.input_size
            #     cy = bbox[1] * self.input_size
            #     w = bbox[2] * self.input_size
            #     h = bbox[3] * self.input_size
            #     cv2.polylines(image, np.expand_dims(np.array([
            #         [q_x1, q_y1],
            #         [q_x2, q_y2],
            #         [q_x3, q_y3],
            #         [q_x4, q_y4]
            #     ], dtype=np.int), axis=0), True, (0, 255, 0), 1)
            #     cv2.rectangle(
            #         image,
            #         (int(cx - (w / 2)), int(cy - (h / 2))),
            #         (int(cx + (w / 2)), int(cy + (h / 2))),
            #         (255, 0, 0),
            #         1
            #     )
            #     cv2.circle(
            #         image,
            #         (int(cx), int(cy)),
            #         3,
            #         (0, 0, 255),
            #         3
            #     )
            #     print(gt_textbox.shape)

            # for quad in quads:
            #     cv2.polylines(image, np.expand_dims(np.array(quad, dtype=np.int), axis=0), True, (0, 255, 0), 1)

            # for bbox in bboxes:
            #     cv2.rectangle(
            #         image,
            #         (int(bbox[0] - (bbox[2] / 2)), int(bbox[1] - (bbox[3] / 2))),
            #         (int(bbox[0] + (bbox[2] / 2)), int(bbox[1] + (bbox[3] / 2))),
            #         (255, 0, 0),
            #         1
            #     )

            # cv2.imshow("image", image)
            # if cv2.waitKey(0) == ord('q'):
            #     cv2.destroyAllWindows()

            matches, neutral_boxes = ssd_utils.match_gt_boxes_to_default_boxes(
                gt_boxes=gt_textboxes[:, :4],
                default_boxes=default_boxes[:, :4],
                match_threshold=self.match_threshold,
                neutral_threshold=self.neutral_threshold)

            # set matched ground truth boxes to default boxes with appropriate class
            y[batch_idx, matches[:, 1], self.num_classes:self.num_classes +
              12] = gt_textboxes[matches[:, 0]]
            y[batch_idx, matches[:, 1], 0:self.num_classes] = gt_classes[
                matches[:, 0]]  # set class scores label
            # set neutral ground truth boxes to default boxes with appropriate class
            y[batch_idx, neutral_boxes[:,
                                       1], self.num_classes:self.num_classes +
              12] = gt_textboxes[neutral_boxes[:, 0]]
            y[batch_idx, neutral_boxes[:, 1], 0:self.num_classes] = np.zeros(
                (self.num_classes
                 ))  # neutral boxes have a class vector of all zeros
            # encode the bounding boxes
            y[batch_idx] = textboxes_utils.encode_textboxes(y[batch_idx])
            X.append(input_img)

        X = np.array(X, dtype=np.float)

        return X, y
コード例 #4
0
def evaluate_qssd_mobilenetv2(config, args):
    print("evaluate_qssd_mobilenetv2")
    input_size = config["model"]["input_size"]
    with open(args.label_maps, "r") as file:
        label_maps = [line.strip("\n") for line in file.readlines()]

    model = QSSD_MOBILENETV2(config,
                             label_maps,
                             is_training=False,
                             num_predictions=args.num_predictions)
    model.load_weights(args.weights)

    images = sorted(list(glob(os.path.join(args.images_dir, "*jpg"))))
    labels = sorted(list(glob(os.path.join(args.images_dir, "*json"))))
    class_id = 5

    Xs = np.zeros((len(images), input_size, input_size, 3), dtype=np.float)
    ys = []

    for i, (image_file, label_file) in enumerate(list(zip(images, labels))):
        print(f"reading sample: {i+1}/{len(images)}")
        image = cv2.imread(image_file)  # read image in bgr format
        input_image = cv2.resize(image, (input_size, input_size))
        width_scale, height_scale = input_size / \
            image.shape[1], input_size / image.shape[0]
        input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
        input_image = mobilenet_v2.preprocess_input(input_image)
        Xs[i] = input_image

        with open(label_file, "r") as label_file:
            label = json.load(label_file)
            objects = label["shapes"]
            objs = []
            for obj in objects:
                if obj["label"] == label_maps[class_id - 1]:
                    polygon = np.array(obj["points"])
                    tp = polygon.copy()
                    tp[:, 0] = polygon[:, 0] * width_scale
                    tp[:, 1] = polygon[:, 1] * height_scale
                    objs.append({"class": obj["label"], "polygon": tp})
            ys.append(objs)

    y_preds = model.predict(Xs)

    recalls = []
    precisions = []

    for confidence_threshold in np.arange(start=0, stop=1, step=0.01):
        y_preds_filtered = []
        for y_pred in y_preds:
            selected_pred = []
            for p in y_pred:
                obj = p[0]
                score = p[1]
                if score >= confidence_threshold and obj == class_id:
                    selected_pred.append(p)
            y_preds_filtered.append(selected_pred)

        TP, TN, FP, FN = 0, 0, 0, 0
        for i in range(len(images)):
            y_true = textboxes_utils.get_bboxes_from_quads(
                np.array([y["polygon"] for y in ys[i]]))
            y_true = bbox_utils.center_to_corner(y_true)
            y_pred = np.array(y_preds_filtered[i])

            if len(y_true) == 0 and len(y_pred) != 0:
                FP += len(y_pred)
                continue

            if len(y_pred) == 0 and len(y_true) != 0:
                FN += len(y_true)
                continue

            y_pred = y_pred[:, 2:6]

            for gt_box in y_true:
                for y_pred_box in y_pred:
                    iou = bbox_utils.iou(np.expand_dims(gt_box, axis=0),
                                         np.expand_dims(y_pred_box, axis=0))
                    if iou > 0.8:
                        TP += 1
                    else:
                        FP += 1

        recall = TP / (TP + FN)
        precision = TP / (TP + FP)
        print(f"-- confidence_score: {confidence_threshold}")
        print(f"---- recall: {recall}")
        print(f"---- precision: {precision}")
        recalls.append(recall)
        precisions.append(precision)

    plt.plot(recalls, precisions)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.show()
コード例 #5
0
    def __get_data(self, batch):
        X = []
        y = self.input_template.copy()

        for batch_idx, sample_idx in enumerate(batch):
            image_path, label_path = self.samples[sample_idx].split(" ")
            image, quads = textboxes_utils.read_sample(image_path=image_path,
                                                       label_path=label_path)

            if self.perform_augmentation:
                image, quads, _ = self.__augment(
                    image=image,
                    quads=quads,
                    classes=None,
                )

            quads = textboxes_utils.sort_quads_vertices(quads)
            bboxes = textboxes_utils.get_bboxes_from_quads(quads)
            image_height, image_width, _ = image.shape
            height_scale, width_scale = self.input_size / image_height, self.input_size / image_width
            input_img = cv2.resize(np.uint8(image),
                                   (self.input_size, self.input_size))
            input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
            input_img = self.process_input_fn(input_img)

            gt_classes = np.zeros((quads.shape[0], self.num_classes))
            gt_textboxes = np.zeros((quads.shape[0], 12))
            default_boxes = y[batch_idx, :, -8:]

            for i in range(bboxes.shape[0]):
                bbox = bboxes[i]
                quad = quads[i]
                cx = bbox[0] * width_scale / self.input_size
                cy = bbox[1] * height_scale / self.input_size
                width = bbox[2] * width_scale / self.input_size
                height = bbox[3] * height_scale / self.input_size
                q_x1 = quad[0, 0] * width_scale / self.input_size
                q_y1 = quad[0, 1] * height_scale / self.input_size
                q_x2 = quad[1, 0] * width_scale / self.input_size
                q_y2 = quad[1, 1] * height_scale / self.input_size
                q_x3 = quad[2, 0] * width_scale / self.input_size
                q_y3 = quad[2, 1] * height_scale / self.input_size
                q_x4 = quad[3, 0] * width_scale / self.input_size
                q_y4 = quad[3, 1] * height_scale / self.input_size
                gt_textboxes[i, :4] = [cx, cy, width, height]
                gt_textboxes[i, 4:] = [
                    q_x1, q_y1, q_x2, q_y2, q_x3, q_y3, q_x4, q_y4
                ]
                gt_classes[i] = [0, 1]

            matches, neutral_boxes = ssd_utils.match_gt_boxes_to_default_boxes(
                gt_boxes=gt_textboxes[:, :4],
                default_boxes=default_boxes[:, :4],
                match_threshold=self.match_threshold,
                neutral_threshold=self.neutral_threshold)

            # set matched ground truth boxes to default boxes with appropriate class
            y[batch_idx, matches[:, 1], self.num_classes:self.num_classes +
              12] = gt_textboxes[matches[:, 0]]
            y[batch_idx, matches[:, 1], 0:self.num_classes] = gt_classes[
                matches[:, 0]]  # set class scores label
            # set neutral ground truth boxes to default boxes with appropriate class
            y[batch_idx, neutral_boxes[:,
                                       1], self.num_classes:self.num_classes +
              12] = gt_textboxes[neutral_boxes[:, 0]]
            y[batch_idx, neutral_boxes[:, 1], 0:self.num_classes] = np.zeros(
                (self.num_classes
                 ))  # neutral boxes have a class vector of all zeros
            # encode the bounding boxes
            y[batch_idx] = textboxes_utils.encode_textboxes(y[batch_idx])
            X.append(input_img)

        X = np.array(X, dtype=np.float)

        return X, y
コード例 #6
0
def random_crop_quad(image,
                     quads,
                     classes,
                     min_size=0.1,
                     max_size=1,
                     min_ar=1,
                     max_ar=2,
                     overlap_modes=[
                         None,
                         [0.1, None],
                         [0.3, None],
                         [0.7, None],
                         [0.9, None],
                         [None, None],
                     ],
                     max_attempts=100,
                     p=0.5):
    """ Randomly crops a patch from the image.

    Args:
        - image: numpy array representing the input image.
        - quads: numpy array representing the quads.
        - classes: the list of classes associating with each quads.
        - min_size: the maximum size a crop can be
        - max_size: the maximum size a crop can be
        - min_ar: the minimum aspect ratio a crop can be
        - max_ar: the maximum aspect ratio a crop can be
        - overlap_modes: the list of overlapping modes the function can randomly choose from.
        - max_attempts: the max number of attempts to generate a patch.

    Returns:
        - image: the modified image
        - quads: the modified quads
        - classes: the modified classes
    """
    assert p >= 0, "p must be larger than or equal to zero"
    assert p <= 1, "p must be less than or equal to 1"
    assert min_size > 0, "min_size must be larger than zero."
    assert max_size <= 1, "max_size must be less than or equals to one."
    assert max_size > min_size, "max_size must be larger than min_size."
    assert max_ar > min_ar, "max_ar must be larger than min_ar."
    assert max_attempts > 0, "max_attempts must be larger than zero."

    # if (random.random() > p):
    #     return image, bboxes, classes

    height, width, channels = image.shape
    overlap_mode = [0.7, None]
    # overlap_mode = random.choice(overlap_modes)

    # if overlap_mode == None:
    #     return image, bboxes, classes

    bboxes = get_bboxes_from_quads(quads)

    min_iou, max_iou = overlap_mode

    if min_iou == None:
        min_iou = float(-np.inf)

    if max_iou == None:
        max_iou = float(np.inf)

    temp_image = image.copy()

    for i in range(max_attempts):
        crop_w = random.uniform(min_size * width, max_size * width)
        crop_h = random.uniform(min_size * height, max_size * height)
        crop_ar = crop_h / crop_w

        if crop_ar < min_ar or crop_ar > max_ar:  # crop ar does not match criteria, next attempt
            continue

        crop_left = random.uniform(0, width - crop_w)
        crop_top = random.uniform(0, height - crop_h)

        crop_rect = np.array(
            [crop_left, crop_top, crop_left + crop_w, crop_top + crop_h],
            dtype=np.float)
        crop_rect = np.expand_dims(crop_rect, axis=0)
        crop_rect = np.tile(crop_rect, (bboxes.shape[0], 1))

        ious = iou(crop_rect, bboxes)
        obj_coverage = object_coverage(crop_rect, bboxes)

        if (ious.min() < min_iou and ious.max() > max_iou) or (
                obj_coverage.min() < min_iou and obj_coverage.max() > max_iou):
            continue

        bbox_centers = np.zeros((bboxes.shape[0], 2), dtype=np.float)
        bbox_centers[:, 0] = (bboxes[:, 0] + bboxes[:, 2]) / 2
        bbox_centers[:, 1] = (bboxes[:, 1] + bboxes[:, 3]) / 2

        cx_in_crop = (bbox_centers[:, 0] > crop_left) * (bbox_centers[:, 0] <
                                                         crop_left + crop_w)
        cy_in_crop = (bbox_centers[:, 1] > crop_top) * (bbox_centers[:, 1] <
                                                        crop_top + crop_h)
        boxes_in_crop = cx_in_crop * cy_in_crop

        if not boxes_in_crop.any():
            continue

        print(ious, obj_coverage, boxes_in_crop)
        print("======")

        temp_image = temp_image[int(crop_top):int(crop_top + crop_h),
                                int(crop_left):int(crop_left + crop_w), :]
        temp_classes = np.array(classes, dtype=np.object)
        temp_classes = temp_classes[boxes_in_crop]
        temp_bboxes = bboxes[boxes_in_crop]
        temp_quads = quads[boxes_in_crop]
        crop_rect = np.array(
            [crop_left, crop_top, crop_left + crop_w, crop_top + crop_h],
            dtype=np.float)
        crop_rect = np.expand_dims(crop_rect, axis=0)
        crop_rect = np.tile(crop_rect, (temp_bboxes.shape[0], 1))

        print(temp_quads.shape)
        temp_bboxes[:, :2] = np.maximum(
            temp_bboxes[:, :2], crop_rect[:, :2]
        )  # if bboxes top left is out of crop then use crop's xmin, ymin
        temp_bboxes[:, :2] -= crop_rect[:, :
                                        2]  # translate xmin, ymin to fit crop
        temp_bboxes[:, 2:] = np.minimum(temp_bboxes[:, 2:], crop_rect[:, 2:])
        temp_bboxes[:,
                    2:] -= crop_rect[:, :2]  # translate xmax, ymax to fit crop
        return temp_image, temp_quads, temp_classes.tolist()

    return image, bboxes, classes