예제 #1
0
    def process_frame(self, frame, extra_data=None):
        super().process_frame(frame)
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img_data = np.expand_dims(image.astype(np.uint8), axis=0)

        result = onnx_helper.run(self.sess_tuple, [img_data])

        detection_boxes, detection_classes, detection_scores, num_detections = result
        h, w, *_ = image.shape
        out_boxes = []
        out_classes = []
        out_scores = []
        batch_size = num_detections.shape[0]
        for batch in range(batch_size):
            for detection in range(int(num_detections[batch])):
                class_index = int(detection_classes[batch][detection])
                out_classes.append(self.class_names[class_index])
                out_scores.append(detection_scores[batch][detection])
                box = detection_boxes[batch][detection]
                box[0] *= h
                box[1] *= w
                box[2] *= h
                box[3] *= w
                out_boxes.append(box)

        res = []
        for i in range(len(out_boxes)):
            res.append({constants.RESULT_KEY_RECT: out_boxes[i], constants.RESULT_KEY_SCORE: out_scores[i], constants.RESULT_KEY_CLASS_NAME: out_classes[i]})
        return res
예제 #2
0
    def _predict(sess_tuple, class_names, input_size, frame):
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img_processed, w, h, nw, nh, dw, dh = Yolov3TinyRunner._image_preprocess(
            np.copy(image), [input_size, input_size])
        # img_processed, w, h, nw, nh, dw, dh = Yolov3TinyRunner._image_preprocess(np.copy(frame), [input_size, input_size])
        image_data = img_processed[np.newaxis, ...].astype(np.float32)
        image_data = np.transpose(image_data, [0, 3, 1, 2])

        # yolov3-tiny için özel kısım
        img_size = np.array([input_size, input_size],
                            dtype=np.float32).reshape(1, 2)
        # boxes, scores, indices = sess.run(None, {input_name: image_data, "image_shape": img_size})
        boxes, scores, indices = onnx_helper.run(sess_tuple,
                                                 [image_data, img_size])
        out_boxes, out_scores, out_classes = Yolov3TinyRunner._postprocess_tiny_yolov3(
            boxes, scores, indices, class_names)

        out_boxes = Yolov3TinyRunner._remove_padding(out_boxes, w, h, nw, nh,
                                                     dw, dh)

        res = []
        for i in range(len(out_boxes)):
            res.append({
                constants.RESULT_KEY_RECT: out_boxes[i],
                constants.RESULT_KEY_SCORE: out_scores[i],
                constants.RESULT_KEY_CLASS_NAME: out_classes[i]
            })
        return res
예제 #3
0
    def process_frame(self, frame, extra_data=None):
        super().process_frame(frame)

        res = []
        if extra_data is not None:
            results = extra_data.get(constants.EXTRA_DATA_KEY_RESULTS, None)
            if results is not None:
                for runner_name, result in results.items():
                    for item in result:
                        class_name = item.get(constants.RESULT_KEY_CLASS_NAME, None)
                        if class_name == "face":
                            rect_face = item.get(constants.RESULT_KEY_RECT, None)
                            if rect_face is not None:
                                bbox = rect_face
                                y1 = max(int(bbox[0]), 0)
                                x1 = max(int(bbox[1]), 0)
                                y2 = max(int(bbox[2]), 0)
                                x2 = max(int(bbox[3]), 0)
                                image = frame[y1:y2, x1:x2]

                                # # padding_ratio = 0.05
                                # padding_ratio = -0.2
                                # bbox = rect_face
                                # y1 = max(int(bbox[0]), 0)
                                # x1 = max(int(bbox[1]), 0)
                                # y2 = max(int(bbox[2]), 0)
                                # x2 = max(int(bbox[3]), 0)
                                # w = x2 - x1
                                # h = y2 - y1
                                # dw = int(w * padding_ratio)
                                # dh = int(h * padding_ratio)
                                # x1 -= dw
                                # x2 += dw
                                # y1 -= dh
                                # y2 += dh
                                # y1 = max(y1, 0)
                                # x1 = max(x1, 0)
                                # y2 = max(y2, 0)
                                # x2 = max(x2, 0)
                                # image = frame[y1:y2, x1:x2]

                                # input_shape = (1, 1, 64, 64)
                                # image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                                # image =  cv2.resize(image, (64, 64), interpolation = cv2.INTER_AREA)
                                # img_data = np.array(image).astype(np.float32)
                                # img_data = np.resize(img_data, input_shape)

                                input_shape = (1, 1, 64, 64)
                                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                                image = image_helper.resize_best_quality(image, (64, 64))
                                img_data = np.array(image).astype(np.float32)
                                img_data = np.resize(img_data, input_shape)

                                # from PIL import Image
                                # rgb = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                                # img = Image.fromarray(rgb)
                                # input_shape = (1, 1, 64, 64)
                                # img = img.resize((64, 64), Image.ANTIALIAS)
                                # img_data = np.array(img).astype(np.float32)
                                # img_data = np.resize(img_data, input_shape)

                                preds0 = onnx_helper.run(self.sess_tuple, [img_data])
                                preds = preds0[0][0]

                                index = int(np.argmax(preds))
                                score = preds[index]
                                emotion_name = self.class_names[index]

                                # #test
                                # item.pop(constants.RESULT_KEY_CLASS_NAME)
                                # item.pop(constants.RESULT_KEY_SCORE)
                                # item.pop(constants.RESULT_KEY_RECT)

                                res.append({constants.RESULT_KEY_RECT: rect_face, constants.RESULT_KEY_CLASS_NAME: emotion_name})

        return res
예제 #4
0
def predict_v5(sess_tuple, input_size, class_names, frame):
    # https://github.com/ultralytics/yolov5
    # torch ve torchvision bağımlılığını kaldırmak için birçok değişiklik yapılmıştır, kod üstteki linktekinden değişiktir.

    def image_preprocess(image1, target_size):
        ih, iw = target_size
        h1, w1, _ = image1.shape

        scale = min(iw / w1, ih / h1)
        nw, nh = int(scale * w1), int(scale * h1)
        if nh != h1 or nw != w1:
            image_resized = image_helper.resize_best_quality(image1, (nw, nh))
        else:
            image_resized = image1

        image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0)
        dw, dh = (iw - nw) // 2, (ih - nh) // 2
        image_padded[dh:nh + dh, dw:nw + dw, :] = image_resized
        image_padded = image_padded / 255.

        return image_padded, w1, h1

    def non_max_suppression(prediction,
                            conf_thres=0.1,
                            iou_thres=0.6,
                            agnostic=False):
        """Performs Non-Maximum Suppression (NMS) on inference results

        Returns:
             detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
        """
        def xywh2xyxy(x_):
            # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
            y = np.zeros_like(x_)
            y[:, 0] = x_[:, 0] - x_[:, 2] / 2  # top left x
            y[:, 1] = x_[:, 1] - x_[:, 3] / 2  # top left y
            y[:, 2] = x_[:, 0] + x_[:, 2] / 2  # bottom right x
            y[:, 3] = x_[:, 1] + x_[:, 3] / 2  # bottom right y
            return y

        def nms_cpu(boxes_, confs_, nms_thresh=0.5, min_mode=False):
            # print(boxes.shape)
            x1_ = boxes_[:, 0]
            y1_ = boxes_[:, 1]
            x2_ = boxes_[:, 2]
            y2_ = boxes_[:, 3]

            areas = (x2_ - x1_) * (y2_ - y1_)
            order = confs_.argsort()[::-1]

            keep = []
            while order.size > 0:
                idx_self = order[0]
                idx_other = order[1:]

                keep.append(idx_self)

                xx1 = np.maximum(x1_[idx_self], x1_[idx_other])
                yy1 = np.maximum(y1_[idx_self], y1_[idx_other])
                xx2 = np.minimum(x2_[idx_self], x2_[idx_other])
                yy2 = np.minimum(y2_[idx_self], y2_[idx_other])

                w_ = np.maximum(0.0, xx2 - xx1)
                h_ = np.maximum(0.0, yy2 - yy1)
                inter = w_ * h_

                if min_mode:
                    over = inter / np.minimum(areas[order[0]],
                                              areas[order[1:]])
                else:
                    over = inter / (areas[order[0]] + areas[order[1:]] - inter)

                inds = np.where(over <= nms_thresh)[0]
                order = order[inds + 1]

            return np.array(keep)

        # nc = prediction[0].shape[1] - 5  # number of classes
        xc = prediction[..., 4] > conf_thres  # candidates

        # Settings
        min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
        max_det = 300  # maximum number of detections per image
        time_limit = 10.0  # seconds to quit after

        t = time.time()
        output = [None] * prediction.shape[0]
        for xi, x in enumerate(prediction):  # image index, image inference
            # Apply constraints
            # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
            x = x[xc[xi]]  # confidence

            # If none remain process next image
            if not x.shape[0]:
                continue

            # Compute conf
            x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

            # Box (center x, center y, width, height) to (x1, y1, x2, y2)
            box_ = xywh2xyxy(x[:, :4])

            # i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
            i_, j = (x[:, 5:] > conf_thres).nonzero()
            # x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
            x = np.array(
                np.concatenate((box_[i_], x[i_, j + 5, None], j[:, None]),
                               1)).astype(np.float32)

            # If none remain process next image
            n = x.shape[0]  # number of boxes
            if not n:
                continue

            # Sort by confidence
            # x = x[x[:, 4].argsort(descending=True)]

            # Batched NMS
            c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
            boxes, scores = x[:, :
                              4] + c, x[:,
                                        4]  # boxes (offset by class), scores
            i_ = nms_cpu(boxes, scores, iou_thres)
            if i_.shape[0] > max_det:  # limit detections
                i_ = i_[:max_det]

            output[xi] = x[i_]
            if (time.time() - t) > time_limit:
                break  # time limit exceeded

        return output

    def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
        def clip_coords(boxes, img_shape):
            boxes[:, 0].clip(0, img_shape[1])  # x1
            boxes[:, 1].clip(0, img_shape[0])  # y1
            boxes[:, 2].clip(0, img_shape[1])  # x2
            boxes[:, 3].clip(0, img_shape[0])  # y2

        # Rescale coords (xyxy) from img1_shape to img0_shape
        if ratio_pad is None:  # calculate from img0_shape
            gain = max(img1_shape) / max(img0_shape)  # gain  = old / new
            pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (
                img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
        else:
            gain = ratio_pad[0][0]
            pad = ratio_pad[1]

        coords[:, [0, 2]] -= pad[0]  # x padding
        coords[:, [1, 3]] -= pad[1]  # y padding
        coords[:, :4] /= gain
        clip_coords(coords, img0_shape)
        return coords

    res = []
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_processed, w, h = image_preprocess(np.copy(image),
                                           [input_size, input_size])
    image_data = img_processed[np.newaxis, ...].astype(np.float32)
    image_data = np.transpose(image_data, [0, 3, 1, 2])

    inputs = [image_data]
    pred = onnx_helper.run(sess_tuple, inputs)[0]

    batch_detections = np.array(pred)
    batch_detections = non_max_suppression(batch_detections,
                                           conf_thres=0.4,
                                           iou_thres=0.5,
                                           agnostic=False)
    detections = batch_detections[0]
    if detections is not None:
        labels = detections[..., -1]
        boxs = detections[..., :4]
        confs = detections[..., 4]
        boxs[:, :] = scale_coords((input_size, input_size), boxs[:, :],
                                  (h, w)).round()
        for i, box in enumerate(boxs):
            x1, y1, x2, y2 = box
            class_name = class_names[int(labels[i])]
            score = confs[i]
            res.append({
                constants.RESULT_KEY_RECT: [y1, x1, y2, x2],
                constants.RESULT_KEY_SCORE: score,
                constants.RESULT_KEY_CLASS_NAME: class_name
            })

    # for i in range(len(out_boxes)):
    #    res.append({constants.RESULT_KEY_RECT: out_boxes[i], constants.RESULT_KEY_SCORE: out_scores[i], constants.RESULT_KEY_CLASS_NAME: out_classes[i]})
    return res
예제 #5
0
def predict_v4(sess_tuple, input_size, class_names, frame):
    h, w, _ = frame.shape

    def nms_cpu(boxes_, confs, nms_thresh=0.5, min_mode=False):
        x1 = boxes_[:, 0]
        y1 = boxes_[:, 1]
        x2 = boxes_[:, 2]
        y2 = boxes_[:, 3]

        areas = (x2 - x1) * (y2 - y1)
        order = confs.argsort()[::-1]

        keep = []
        while order.size > 0:
            idx_self = order[0]
            idx_other = order[1:]

            keep.append(idx_self)

            xx1 = np.maximum(x1[idx_self], x1[idx_other])
            yy1 = np.maximum(y1[idx_self], y1[idx_other])
            xx2 = np.minimum(x2[idx_self], x2[idx_other])
            yy2 = np.minimum(y2[idx_self], y2[idx_other])

            w_ = np.maximum(0.0, xx2 - xx1)
            h_ = np.maximum(0.0, yy2 - yy1)
            inter = w_ * h_

            if min_mode:
                over = inter / np.minimum(areas[order[0]], areas[order[1:]])
            else:
                over = inter / (areas[order[0]] + areas[order[1:]] - inter)

            inds = np.where(over <= nms_thresh)[0]
            order = order[inds + 1]

        return np.array(keep)

    def post_processing(conf_thresh, nms_thresh, output):

        # anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
        # num_anchors = 9
        # anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
        # strides = [8, 16, 32]
        # anchor_step = len(anchors) // num_anchors

        # [batch, num, 1, 4]
        box_array = output[0]
        # [batch, num, num_classes]
        confs = output[1]

        if type(box_array).__name__ != 'ndarray':
            box_array = box_array.cpu().detach().numpy()
            confs = confs.cpu().detach().numpy()

        num_classes = confs.shape[2]

        # [batch, num, 4]
        box_array = box_array[:, :, 0]

        # [batch, num, num_classes] --> [batch, num]
        max_conf = np.max(confs, axis=2)
        max_id = np.argmax(confs, axis=2)

        bboxes_batch = []
        for i_ in range(box_array.shape[0]):
            argwhere = max_conf[i_] > conf_thresh
            l_box_array = box_array[i_, argwhere, :]
            l_max_conf = max_conf[i_, argwhere]
            l_max_id = max_id[i_, argwhere]

            bboxes = []
            # nms for each class
            for j in range(num_classes):

                cls_argwhere = l_max_id == j
                ll_box_array = l_box_array[cls_argwhere, :]
                ll_max_conf = l_max_conf[cls_argwhere]
                ll_max_id = l_max_id[cls_argwhere]

                keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)

                if keep.size > 0:
                    ll_box_array = ll_box_array[keep, :]
                    ll_max_conf = ll_max_conf[keep]
                    ll_max_id = ll_max_id[keep]

                    for k in range(ll_box_array.shape[0]):
                        bboxes.append([
                            ll_box_array[k, 0], ll_box_array[k, 1],
                            ll_box_array[k, 2], ll_box_array[k, 3],
                            ll_max_conf[k], ll_max_conf[k], ll_max_id[k]
                        ])

            bboxes_batch.append(bboxes)
        return bboxes_batch

    # IN_IMAGE_H = sess.get_inputs()[0].shape[2]
    # IN_IMAGE_W = sess.get_inputs()[0].shape[3]
    IN_IMAGE_H = IN_IMAGE_W = input_size

    # resized = cv2.resize(frame, (IN_IMAGE_W, IN_IMAGE_H), interpolation=cv2.INTER_LINEAR)
    resized = image_helper.resize_best_quality(frame, (IN_IMAGE_W, IN_IMAGE_H))
    img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
    img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
    img_in = np.expand_dims(img_in, axis=0)
    img_in /= 255.0

    # input_name = sess.get_inputs()[0].name
    # outputs = sess.run(None, {input_name: img_in})
    inputs = [img_in]
    outputs = onnx_helper.run(sess_tuple, inputs)

    boxes = post_processing(0.4, 0.6, outputs)

    def process_boxes(boxes_, width, height, class_names_):
        out_boxes1 = []
        out_scores1 = []
        out_classes1 = []
        for box in boxes_[0]:
            if len(box) >= 7:
                x1 = int(box[0] * width)
                y1 = int(box[1] * height)
                x2 = int(box[2] * width)
                y2 = int(box[3] * height)
                out_boxes1.append([y1, x1, y2, x2])
                out_scores1.append(box[5])
                out_classes1.append(class_names_[box[6]])
        return out_boxes1, out_scores1, out_classes1

    out_boxes, out_scores, out_classes = process_boxes(boxes, w, h,
                                                       class_names)

    res = []
    for i in range(len(out_boxes)):
        res.append({
            constants.RESULT_KEY_RECT: out_boxes[i],
            constants.RESULT_KEY_SCORE: out_scores[i],
            constants.RESULT_KEY_CLASS_NAME: out_classes[i]
        })
    return res
예제 #6
0
    def process_frame(self, frame, extra_data=None):
        super().process_frame(frame)

        res = []
        count_female = 0
        count_male = 0
        results = extra_data.get(constants.EXTRA_DATA_KEY_RESULTS, None)
        if results is not None:
            for runner_name, result in results.items():
                for item in result:
                    class_name = item.get(constants.RESULT_KEY_CLASS_NAME,
                                          None)
                    if class_name == "face":
                        rect_face = item.get(constants.RESULT_KEY_RECT, None)
                        if rect_face is not None:
                            # bbox = rect_face
                            # y1 = max(int(bbox[0]), 0)
                            # x1 = max(int(bbox[1]), 0)
                            # y2 = max(int(bbox[2]), 0)
                            # x2 = max(int(bbox[3]), 0)
                            # image = frame[y1:y2, x1:x2]

                            # padding_ratio = 0.15
                            padding_ratio = 0.3
                            bbox = rect_face
                            y1 = max(int(bbox[0]), 0)
                            x1 = max(int(bbox[1]), 0)
                            y2 = max(int(bbox[2]), 0)
                            x2 = max(int(bbox[3]), 0)
                            w = x2 - x1
                            h = y2 - y1
                            dw = int(w * padding_ratio)
                            dh = int(h * padding_ratio)
                            x1 -= dw
                            x2 += dw
                            y1 -= dh
                            y2 += dh
                            y1 = max(y1, 0)
                            x1 = max(x1, 0)
                            y2 = max(y2, 0)
                            x2 = max(x2, 0)
                            image = frame[y1:y2, x1:x2]

                            # # test
                            # item.pop(constants.RESULT_KEY_CLASS_NAME)
                            # item.pop(constants.RESULT_KEY_SCORE)
                            # item.pop(constants.RESULT_KEY_RECT)

                            input_shape = (1, 64, 64, 3)
                            image = image_helper.resize_best_quality(
                                image, (64, 64))
                            img_data = np.array(image).astype(np.float32)
                            img_data = np.resize(img_data, input_shape)

                            pred = onnx_helper.run(self.sess_tuple, [img_data])

                            predicted_gender = pred[0][0]
                            ages = np.arange(0, 101).reshape(101, 1)
                            predicted_age = pred[1].dot(ages).flatten()

                            preview_key = "KADIN" if predicted_gender[
                                0] > 0.5 else "ERKEK"
                            name = "{}-{}".format(preview_key,
                                                  int(predicted_age))

                            res.append({
                                constants.RESULT_KEY_RECT:
                                rect_face,
                                constants.RESULT_KEY_CLASS_NAME:
                                name,
                                constants.RESULT_KEY_PREVIEW_KEY:
                                preview_key
                            })
                            if predicted_gender[0] > 0.5:
                                count_female += 1
                            else:
                                count_male += 1

        data = {'female_count': count_female, 'male_count': count_male}
        res.append({constants.RESULT_KEY_DATA: data})
        return res
예제 #7
0
    def _process(self, image, res, x1, y1):
        def decode_bbox(anchors, raw_outputs):
            variances = [0.1, 0.1, 0.2, 0.2]
            '''
            Decode the actual bbox according to the anchors.
            the anchor value order is:[xmin,ymin, xmax, ymax]
            :param anchors: numpy array with shape [batch, num_anchors, 4]
            :param raw_outputs: numpy array with the same shape with anchors
            :param variances: list of float, default=[0.1, 0.1, 0.2, 0.2]
            :return:
            '''
            anchor_centers_x = (anchors[:, :, 0:1] + anchors[:, :, 2:3]) / 2
            anchor_centers_y = (anchors[:, :, 1:2] + anchors[:, :, 3:]) / 2
            anchors_w = anchors[:, :, 2:3] - anchors[:, :, 0:1]
            anchors_h = anchors[:, :, 3:] - anchors[:, :, 1:2]
            raw_outputs_rescale = raw_outputs * np.array(variances)
            predict_center_x = raw_outputs_rescale[:, :, 0:
                                                   1] * anchors_w + anchor_centers_x
            predict_center_y = raw_outputs_rescale[:, :, 1:
                                                   2] * anchors_h + anchor_centers_y
            predict_w = np.exp(raw_outputs_rescale[:, :, 2:3]) * anchors_w
            predict_h = np.exp(raw_outputs_rescale[:, :, 3:]) * anchors_h
            predict_xmin = predict_center_x - predict_w / 2
            predict_ymin = predict_center_y - predict_h / 2
            predict_xmax = predict_center_x + predict_w / 2
            predict_ymax = predict_center_y + predict_h / 2
            predict_bbox = np.concatenate(
                [predict_xmin, predict_ymin, predict_xmax, predict_ymax],
                axis=-1)
            return predict_bbox

        def single_class_non_max_suppression(bboxes,
                                             confidences,
                                             conf_thresh_=0.2,
                                             iou_thresh_=0.5,
                                             keep_top_k=-1):
            """
            do nms on single class.
            Hint: for the specific class, given the bbox and its confidence,
            1) sort the bbox according to the confidence from top to down, we call this a set
            2) select the bbox with the highest confidence, remove it from set, and do IOU calculate with the rest bbox
            3) remove the bbox whose IOU is higher than the iou_thresh from the set,
            4) loop step 2 and 3, util the set is empty.
            :param bboxes: numpy array of 2D, [num_bboxes, 4]
            :param confidences: numpy array of 1D. [num_bboxes]
            :param conf_thresh_:
            :param iou_thresh_:
            :param keep_top_k:
            :return:
            """
            if len(bboxes) == 0:
                return []

            conf_keep_idx = np.where(confidences > conf_thresh_)[0]

            bboxes = bboxes[conf_keep_idx]
            confidences = confidences[conf_keep_idx]

            pick = []
            xmin_ = bboxes[:, 0]
            ymin_ = bboxes[:, 1]
            xmax_ = bboxes[:, 2]
            ymax_ = bboxes[:, 3]

            area = (xmax_ - xmin_ + 1e-3) * (ymax_ - ymin_ + 1e-3)
            idxs = np.argsort(confidences)

            while len(idxs) > 0:
                last = len(idxs) - 1
                i = idxs[last]
                pick.append(i)

                # keep top k
                if keep_top_k != -1:
                    if len(pick) >= keep_top_k:
                        break

                overlap_xmin = np.maximum(xmin_[i], xmin_[idxs[:last]])
                overlap_ymin = np.maximum(ymin_[i], ymin_[idxs[:last]])
                overlap_xmax = np.minimum(xmax_[i], xmax_[idxs[:last]])
                overlap_ymax = np.minimum(ymax_[i], ymax_[idxs[:last]])
                overlap_w = np.maximum(0, overlap_xmax - overlap_xmin)
                overlap_h = np.maximum(0, overlap_ymax - overlap_ymin)
                overlap_area = overlap_w * overlap_h
                overlap_ratio = overlap_area / (area[idxs[:last]] + area[i] -
                                                overlap_area)

                need_to_be_deleted_idx = np.concatenate(
                    ([last], np.where(overlap_ratio > iou_thresh_)[0]))
                idxs = np.delete(idxs, need_to_be_deleted_idx)

            return conf_keep_idx[pick]

        conf_thresh = 0.5
        # conf_thresh = 0.2
        iou_thresh = 0.4

        target_shape = (360, 360)
        # target_shape = (160, 160)

        height, width, _ = image.shape

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # image_resized = cv2.resize(image, target_shape, interpolation=cv2.INTER_AREA).astype(np.float32)
        image_resized = image_helper.resize_best_quality(
            image, target_shape).astype(np.float32)
        image_np = image_resized / 255.0
        image_exp = np.expand_dims(image_np, axis=0)

        image_transposed = image_exp.transpose((0, 3, 1, 2))

        y_bboxes_output, y_cls_output = onnx_helper.run(
            self.sess_tuple, [image_transposed])

        # remove the batch dimension, for batch is always 1 for inference.
        y_bboxes = decode_bbox(self.__anchors_exp, y_bboxes_output)[0]
        y_cls = y_cls_output[0]
        # To speed up, do single class NMS, not multiple classes NMS.
        bbox_max_scores = np.max(y_cls, axis=1)
        bbox_max_score_classes = np.argmax(y_cls, axis=1)

        # keep_idx is the alive bounding box after nms.
        keep_idxs = single_class_non_max_suppression(
            y_bboxes,
            bbox_max_scores,
            conf_thresh_=conf_thresh,
            iou_thresh_=iou_thresh,
        )

        # # #test
        # item.pop(constants.RESULT_KEY_CLASS_NAME)
        # item.pop(constants.RESULT_KEY_SCORE)
        # item.pop(constants.RESULT_KEY_RECT)

        count_no_mask = 0
        count_mask = 0
        for idx in keep_idxs:
            score = float(bbox_max_scores[idx])
            class_id = bbox_max_score_classes[idx]
            bbox = y_bboxes[idx]

            if self._max_rect > 0:
                if self._max_rect < bbox[2] - bbox[0] or self._max_rect < bbox[
                        3] - bbox[1]:
                    continue

            if class_id == 0:
                count_mask += 1
            else:
                count_no_mask += 1

            xmin = max(0, int(bbox[0] * width)) + x1
            ymin = max(0, int(bbox[1] * height)) + y1
            xmax = min(int(bbox[2] * width), width) + x1
            ymax = min(int(bbox[3] * height), height) + y1

            rect_face = [ymin, xmin, ymax, xmax]
            res.append({
                constants.RESULT_KEY_RECT:
                rect_face,
                constants.RESULT_KEY_CLASS_NAME:
                self.class_names[class_id],
                constants.RESULT_KEY_SCORE:
                score
            })

        no_mask = self.evaluate_no_mask(count_no_mask)
        if self.on_cooldown():
            no_mask = False
        if no_mask:
            print('No Mask Exists !!')
            if self.capture_frames:
                self.save_frame_locally()
        data = {
            constants.RESULT_KEY_DATA: {
                "mask": count_mask,
                "no_mask": count_no_mask,
                "no_mask_exists": count_no_mask > 0 and no_mask
            }
        }

        if self._last_data != data:
            self.confirm_val -= 1
            if self.confirm_val <= 0:
                self.confirm_val = self.confirm_count
                self._last_data = data
                res.append(data)
                if self._debug:
                    res.append({
                        constants.RESULT_KEY_DEBUG:
                        "Maske takan: {} - takmayan: {}".format(
                            count_mask, count_no_mask)
                    })