def detect_video_bgs(Yolo,
                     video_path,
                     output_path,
                     log_path,
                     input_size=416,
                     show=False,
                     CLASSES=YOLO_COCO_CLASSES,
                     score_threshold=0.3,
                     iou_threshold=0.45,
                     rectangle_colors='',
                     draw_roi=False,
                     zoom=0,
                     show_diver=True):

    times, times_2 = [], []
    vid = cv2.VideoCapture(video_path)

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    LOW = np.array([80, 0, 200])
    HIGH = np.array([255, 110, 255])

    log = pd.DataFrame(columns=[
        "vis_px", "vis_px_pc", "total_px", "total_px_pc", "diff", "diff_pc"
    ])
    while True:
        _, img = vid.read()
        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        pred_bbox = Yolo.predict(image_data)
        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)
        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        #Countour BGS:
        hsv = cv2.cvtColor(original_image, cv2.COLOR_BGR2HSV)
        # mask image
        fgMask = cv2.inRange(hsv, LOW, HIGH)

        #(x1, y1), (x2, y2) = (bboxes[0], bboxes[1]), (bboxes[2], bboxes[3])
        splash_boxes = [
            i for i in bboxes if CLASS_INDECES[int(i[5])] == "splash"
        ]

        if splash_boxes:
            splash_x_min, splash_y_min, splash_x_max, splash_y_max = splash_bbox_roi(
                splash_boxes=splash_boxes, zoom=zoom)

            #normal_image:
            number_of_white_pix = np.sum(fgMask == 255)
            number_total_pix = fgMask.shape[0] * fgMask.shape[1]
            print("Normal_image: Number of white pixels: {} ({}%)".format(
                number_of_white_pix,
                round((number_of_white_pix / number_total_pix) * 100), 2))

            #splash_roi:
            splash_roi = fgMask[splash_y_min:splash_y_max,
                                splash_x_min:splash_x_max]
            roi_number_of_white_pix = np.sum(splash_roi == 255)
            # roi_number_total_pix = splash_roi.shape[0]*splash_roi.shape[1]
            print("Roi: Number of white pixels: {} ({}%)".format(
                roi_number_of_white_pix,
                round((roi_number_of_white_pix / number_total_pix) * 100), 2))

            pixel_diff = abs(roi_number_of_white_pix - number_of_white_pix)

            image = cv2.cvtColor(fgMask, cv2.COLOR_GRAY2RGB)

            if draw_roi:
                # image = draw_bbox(image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
                #splash_x_min,splash_y_min,splash_x_max,splash_y_max
                image = cv2.rectangle(image, (splash_x_min, splash_y_min),
                                      (splash_x_max, splash_y_max),
                                      (255, 0, 0), 2)

            else:
                # create mask and apply
                mask = np.zeros(image.shape[:2], dtype="uint8")
                cv2.rectangle(mask, (splash_x_min, splash_y_min),
                              (splash_x_max, splash_y_max), 255, -1)
                masked = cv2.bitwise_and(image, image, mask=mask)

                image = masked

            #Recolor
            image = recolor_bw(image, splash_red=True)

            #Calcs
            vis_px_pc = round(
                (roi_number_of_white_pix / number_total_pix) * 100, 2)
            total_px_pc = round((number_of_white_pix / number_total_pix) * 100,
                                2)
            diff_pc = round(
                (roi_number_of_white_pix / number_of_white_pix) * 100, 2)

            image = cv2.putText(
                image,
                "Vis. PXs (roi): {} ({}%) Total wPXs: {} ({}%) Diff: {} ({}%) "
                .format(roi_number_of_white_pix, vis_px_pc,
                        number_of_white_pix, total_px_pc, pixel_diff, diff_pc),
                (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.7, (0, 0, 255), 1)
            # Create logs:
            log = log.append(
                {
                    "vis_px": roi_number_of_white_pix,
                    "vis_px_pc": vis_px_pc,
                    "total_px": number_of_white_pix,
                    "total_px_pc": total_px_pc,
                    "diff": pixel_diff,
                    "diff_pc": diff_pc
                },
                ignore_index=True)

        else:
            if not show_diver:
                #No splash and no diver should be shown.
                image = np.zeros(original_image.shape[:2], dtype="uint8")
                image = recolor_bw(image, splash_red=False)

            else:
                image = draw_bbox(original_image,
                                  bboxes,
                                  CLASSES=CLASSES,
                                  rectangle_colors=rectangle_colors)

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        # image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
        #                     (0, 0, 255), 2)

        # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))

        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    log.to_csv(log_path)
def detect_video_knn(Yolo,
                     video_path,
                     output_path,
                     input_size=416,
                     show=False,
                     CLASSES=YOLO_COCO_CLASSES,
                     score_threshold=0.3,
                     iou_threshold=0.45,
                     rectangle_colors='',
                     draw_roi=False,
                     zoom=0):

    #different background subtraction methods

    # backSub = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=40, detectShadows=False)
    backSub = cv2.createBackgroundSubtractorKNN()

    #KNN
    backSub.setDetectShadows(False)
    backSub.setDist2Threshold(13000)
    backSub.setkNNSamples(6)
    backSub.setNSamples(30)

    times, times_2 = [], []
    vid = cv2.VideoCapture(video_path)

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4
    while True:
        _, img = vid.read()
        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        pred_bbox = Yolo.predict(image_data)

        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        fgMask = backSub.apply(original_image, learningRate=0.9)

        #(x1, y1), (x2, y2) = (bboxes[0], bboxes[1]), (bboxes[2], bboxes[3])
        splash_boxes = [
            i for i in bboxes if CLASS_INDECES[int(i[5])] == "splash"
        ]

        if splash_boxes:
            splash_x_min, splash_y_min, splash_x_max, splash_y_max = splash_bbox_roi(
                splash_boxes=splash_boxes, zoom=zoom)

            #normal_image:
            number_of_white_pix = np.sum(fgMask == 255)
            number_total_pix = fgMask.shape[0] * fgMask.shape[1]
            print("Normal_image: Number of white pixels: {} ({}%)".format(
                number_of_white_pix,
                round((number_of_white_pix / number_total_pix) * 100), 2))

            #splash_roi:
            splash_roi = fgMask[splash_y_min:splash_y_max,
                                splash_x_min:splash_x_max]
            roi_number_of_white_pix = np.sum(splash_roi == 255)
            # roi_number_total_pix = splash_roi.shape[0]*splash_roi.shape[1]
            print("Roi: Number of white pixels: {} ({}%)".format(
                roi_number_of_white_pix,
                round((roi_number_of_white_pix / number_total_pix) * 100), 2))

            pixel_diff = abs(roi_number_of_white_pix - number_of_white_pix)

            image = cv2.cvtColor(fgMask, cv2.COLOR_GRAY2RGB)

            if draw_roi:
                # image = draw_bbox(image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
                #splash_x_min,splash_y_min,splash_x_max,splash_y_max
                image = cv2.rectangle(image, (splash_x_min, splash_y_min),
                                      (splash_x_max, splash_y_max),
                                      (255, 0, 0), 2)

            else:
                # create mask and apply
                mask = np.zeros(image.shape[:2], dtype="uint8")
                cv2.rectangle(mask, (splash_x_min, splash_y_min),
                              (splash_x_max, splash_y_max), 255, -1)
                masked = cv2.bitwise_and(image, image, mask=mask)

                image = masked

            image = cv2.putText(
                image,
                "Vis. PXs (roi): {} ({}%) Total wPXs: {} ({}%) Diff: {} ({}%) "
                .format(
                    roi_number_of_white_pix,
                    round((roi_number_of_white_pix / number_total_pix) * 100,
                          2), number_of_white_pix,
                    round((number_of_white_pix / number_total_pix) * 100, 2),
                    pixel_diff,
                    round(
                        (roi_number_of_white_pix / number_of_white_pix) * 100,
                        2)), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.7,
                (0, 0, 255), 1)

        else:
            #TODO what todo with no splash images ?

            image = draw_bbox(original_image,
                              bboxes,
                              CLASSES=CLASSES,
                              rectangle_colors=rectangle_colors)

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        # image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
        #                     (0, 0, 255), 2)

        # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))

        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
def detect_video(Yolo,
                 video_path,
                 output_path,
                 input_size=416,
                 show=False,
                 CLASSES=YOLO_COCO_CLASSES,
                 score_threshold=0.3,
                 iou_threshold=0.45,
                 rectangle_colors=''):
    times, times_2 = [], []
    vid = cv2.VideoCapture(video_path)

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    while True:
        _, img = vid.read()

        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break

        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        image = draw_bbox(original_image,
                          bboxes,
                          CLASSES=CLASSES,
                          rectangle_colors=rectangle_colors)

        t3 = time.time()
        times.append(t2 - t1)
        times_2.append(t3 - t1)

        times = times[-20:]
        times_2 = times_2[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms
        fps2 = 1000 / (sum(times_2) / len(times_2) * 1000)

        image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))

        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
def get_mAP(Yolo,
            dataset,
            score_threshold=0.25,
            iou_threshold=0.50,
            TEST_INPUT_SIZE=TEST_INPUT_SIZE):
    MINOVERLAP = 0.5  # default value (defined in the PASCAL VOC2012 challenge)
    NUM_CLASS = read_class_names(TRAIN_CLASSES)

    ground_truth_dir_path = 'mAP/ground-truth'
    if os.path.exists(ground_truth_dir_path):
        shutil.rmtree(ground_truth_dir_path)

    if not os.path.exists('mAP'): os.mkdir('mAP')
    os.mkdir(ground_truth_dir_path)

    print(f'\ncalculating mAP{int(iou_threshold * 100)}...\n')

    gt_counter_per_class = {}
    for index in range(dataset.num_samples):
        ann_dataset = dataset.annotations[index]

        original_image, bbox_data_gt = dataset.parse_annotation(
            ann_dataset, True)

        if len(bbox_data_gt) == 0:
            bboxes_gt = []
            classes_gt = []
        else:
            bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
        ground_truth_path = os.path.join(ground_truth_dir_path,
                                         str(index) + '.txt')
        num_bbox_gt = len(bboxes_gt)

        bounding_boxes = []
        for i in range(num_bbox_gt):
            class_name = NUM_CLASS[classes_gt[i]]
            xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
            bbox = xmin + " " + ymin + " " + xmax + " " + ymax
            bounding_boxes.append({
                "class_name": class_name,
                "bbox": bbox,
                "used": False
            })

            # count that object
            if class_name in gt_counter_per_class:
                gt_counter_per_class[class_name] += 1
            else:
                # if class didn't exist yet
                gt_counter_per_class[class_name] = 1
            bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
        with open(f'{ground_truth_dir_path}/{str(index)}_ground_truth.json',
                  'w') as outfile:
            json.dump(bounding_boxes, outfile)

    gt_classes = list(gt_counter_per_class.keys())
    # sort the classes alphabetically
    gt_classes = sorted(gt_classes)
    n_classes = len(gt_classes)

    times = []
    json_pred = [[] for i in range(n_classes)]
    for index in range(dataset.num_samples):
        ann_dataset = dataset.annotations[index]

        image_name = ann_dataset[0].split('/')[-1]
        original_image, bbox_data_gt = dataset.parse_annotation(
            ann_dataset, True)

        image = image_preprocess(np.copy(original_image),
                                 [TEST_INPUT_SIZE, TEST_INPUT_SIZE])
        image_data = image[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()

        times.append(t2 - t1)

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, TEST_INPUT_SIZE,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        for bbox in bboxes:
            coor = np.array(bbox[:4], dtype=np.int32)
            score = bbox[4]
            class_ind = int(bbox[5])
            class_name = NUM_CLASS[class_ind]
            score = '%.4f' % score
            xmin, ymin, xmax, ymax = list(map(str, coor))
            bbox = xmin + " " + ymin + " " + xmax + " " + ymax
            json_pred[gt_classes.index(class_name)].append({
                "confidence":
                str(score),
                "file_id":
                str(index),
                "bbox":
                str(bbox)
            })

    ms = sum(times) / len(times) * 1000
    fps = 1000 / ms

    for class_name in gt_classes:
        json_pred[gt_classes.index(class_name)].sort(
            key=lambda x: float(x['confidence']), reverse=True)
        with open(f'{ground_truth_dir_path}/{class_name}_predictions.json',
                  'w') as outfile:
            json.dump(json_pred[gt_classes.index(class_name)], outfile)

    # Calculate the AP for each class
    sum_AP = 0.0
    ap_dictionary = {}
    # open file to store the results
    with open("mAP/results.txt", 'w') as results_file:
        results_file.write("# AP and precision/recall per class\n")
        count_true_positives = {}
        for class_index, class_name in enumerate(gt_classes):
            count_true_positives[class_name] = 0
            # Load predictions of that class
            predictions_file = f'{ground_truth_dir_path}/{class_name}_predictions.json'
            predictions_data = json.load(open(predictions_file))

            # Assign predictions to ground truth objects
            nd = len(predictions_data)
            tp = [0] * nd  # creates an array of zeros of size nd
            fp = [0] * nd
            for idx, prediction in enumerate(predictions_data):
                file_id = prediction["file_id"]
                # assign prediction to ground truth object if any
                #   open ground-truth with that file_id
                gt_file = f'{ground_truth_dir_path}/{str(file_id)}_ground_truth.json'
                ground_truth_data = json.load(open(gt_file))
                ovmax = -1
                gt_match = -1
                # load prediction bounding-box
                bb = [float(x) for x in prediction["bbox"].split()
                      ]  # bounding box of prediction
                for obj in ground_truth_data:
                    # look for a class_name match
                    if obj["class_name"] == class_name:
                        bbgt = [float(x) for x in obj["bbox"].split()
                                ]  # bounding box of ground truth
                        bi = [
                            max(bb[0], bbgt[0]),
                            max(bb[1], bbgt[1]),
                            min(bb[2], bbgt[2]),
                            min(bb[3], bbgt[3])
                        ]
                        iw = bi[2] - bi[0] + 1
                        ih = bi[3] - bi[1] + 1
                        if iw > 0 and ih > 0:
                            # compute overlap (IoU) = area of intersection / area of union
                            ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (
                                bbgt[2] - bbgt[0] + 1) * (bbgt[3] - bbgt[1] +
                                                          1) - iw * ih
                            ov = iw * ih / ua
                            if ov > ovmax:
                                ovmax = ov
                                gt_match = obj

                # assign prediction as true positive/don't care/false positive
                if ovmax >= MINOVERLAP:  # if ovmax > minimum overlap
                    if not bool(gt_match["used"]):
                        # true positive
                        tp[idx] = 1
                        gt_match["used"] = True
                        count_true_positives[class_name] += 1
                        # update the ".json" file
                        with open(gt_file, 'w') as f:
                            f.write(json.dumps(ground_truth_data))
                    else:
                        # false positive (multiple detection)
                        fp[idx] = 1
                else:
                    # false positive
                    fp[idx] = 1

            # compute precision/recall
            cumsum = 0
            for idx, val in enumerate(fp):
                fp[idx] += cumsum
                cumsum += val
            cumsum = 0
            for idx, val in enumerate(tp):
                tp[idx] += cumsum
                cumsum += val
            # print(tp)
            rec = tp[:]
            for idx, val in enumerate(tp):
                rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
            # print(rec)
            prec = tp[:]
            for idx, val in enumerate(tp):
                prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
            # print(prec)

            ap, mrec, mprec = voc_ap(rec, prec)
            sum_AP += ap
            text = "{0:.3f}%".format(
                ap * 100
            ) + " = " + class_name + " AP  "  # class_name + " AP = {0:.2f}%".format(ap*100)

            rounded_prec = ['%.3f' % elem for elem in prec]
            rounded_rec = ['%.3f' % elem for elem in rec]
            # Write to results.txt
            results_file.write(text + "\n Precision: " + str(rounded_prec) +
                               "\n Recall   :" + str(rounded_rec) + "\n\n")

            print(text)
            ap_dictionary[class_name] = ap

        results_file.write("\n# mAP of all classes\n")
        mAP = sum_AP / n_classes

        text = "mAP = {:.3f}%, {:.2f} FPS".format(mAP * 100, fps)
        results_file.write(text + "\n")
        print(text)

        return mAP * 100