Esempio n. 1
0
def visualize_bbox(img_path,
                   target,
                   plot_objects=True,
                   plot_parts=True,
                   out_img_path='bbox_viz.jpg'):
    """
    Required library: https://github.com/nalepae/bounding-box/
    """
    import cv2
    from bounding_box import bounding_box as bb

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)

    for obj in target['object']:
        if plot_objects:
            xmin = obj['bndbox']['xmin']
            ymin = obj['bndbox']['ymin']
            xmax = obj['bndbox']['xmax']
            ymax = obj['bndbox']['ymax']
            bb.add(img, xmin, ymin, xmax, ymax, obj['name'])
        if plot_parts:
            for part in obj['parts']:
                xmin = part['bndbox']['xmin']
                ymin = part['bndbox']['ymin']
                xmax = part['bndbox']['xmax']
                ymax = part['bndbox']['ymax']
                bb.add(img, xmin, ymin, xmax, ymax, part['name'])

    cv2.imwrite(out_img_path, img)
    cv2.imshow(target['filename'], img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
Esempio n. 2
0
def predict(frame, faceNet, model, labels):
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (104.0, 177.0, 123.0))
    faceNet.setInput(blob)
    detections = faceNet.forward()

    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]

        if confidence > args["conf"]:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            face = frame[startY:endY, startX:endX]
            age = get_age_from_model(face, model, labels)

            # display the predicted age to our terminal
            text = "{:s}".format(age)

            # draw the bounding box of the face along with the associated
            # predicted age
            #y = startY - 10 if startY - 10 > 10 else startY + 10
            bb.add(frame, startX, startY, endX, endY, text)
            #cv2.rectangle(frame, (startX, startY), (endX, endY),(0, 0, 255), 2)


#            cv2.puttext(frame, text, (startX, y),cv2.font_hershey_simplex,
#                        0.45, (0, 0, 255), 2)

    return frame
Esempio n. 3
0
def main():
    cont = 0
    while (True):
        cont += 1
        if not cont % 4 == 0:
            continue
        ret, frame = cap.read()
        rcocho = model1.result(frame)
        rbeberoudo = model2.result(frame)
        #    rpatio = model3.result(frame)
        image = cv2.resize(frame, (int(720), int(480)))
        bb.add(image, cocho[0], cocho[1], cocho[0] + cocho[2],
               cocho[1] + cocho[3], "COCHO STATUS: " + rcocho[0], "orange")
        bb.add(image, beberoudo[0], beberoudo[1], beberoudo[0] + beberoudo[2],
               beberoudo[1] + beberoudo[3],
               "BEBEROUDO STATUS: " + rbeberoudo[0], "aqua")
        #    bb.add(image, patio[0], patio[1], patio[0]+ patio[2], patio[1]+patio[3], "PATIO STATUS: " + rpatio[0], "green")
        img = image
        #roi_cocho = result[1]
        #cv2.imshow("roi sensor",roi_cocho)
        cv2.imshow('frame', img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Esempio n. 4
0
    def show(self, show, ia=None):
        """Draws inferenced objects into the image, if show == True, also displays it"""
        for d in self.detections:
            color = (34, 139, 34)
            start_x, start_y, w, h = d.bbox.unwrap()

            bb.add(self.img, start_x, start_y, start_x + w, start_y + h,
                   d.label)
            #cv2.rectangle(self.img, (start_x, start_y), (start_x+w,start_y+h), color, 2)
            #text = "{}".format(d.label)
            #cv2.putText(self.img, text, (start_x, start_y -5),
            #cv2.FONT_HERSHEY_SIMPLEX,0.5, color, 1)

        #showing also grounding truths for testing purposes
        if ia != None:
            for gt in ia.gts:
                color = (128, 0, 0)

                x = int(float(gt.bbox.x) * IMG_W)
                y = int(float(gt.bbox.y) * IMG_H)

                x2 = x + int(float(gt.bbox.w) * IMG_W)
                y2 = y + int(float(gt.bbox.h) * IMG_H)

                cv2.rectangle(self.img, (x, y), (x2, y2), color, 2)

        if show == True:
            cv2.imshow('image', self.img)
            cv2.waitKey(0)
    def get_frame(self):
        success, frame = self.video.read()
        cv2.imwrite("frame.jpg", frame)
        image = cv2.imread("frame.jpg")
        img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        faces = detector.detect_faces(img)
        for face in faces:
            x, y, width, heigh = face['box']
            bb.add(image, x, y, x + width, y + heigh, "Face", "fuchsia")

        ret, jpeg = cv2.imencode('.jpg', image)
        return jpeg.tobytes()
Esempio n. 6
0
def make_box():

    with open(EMOTION_TXT_FILE, 'r') as eFile:
        lines = eFile.readlines()
        for i, row in enumerate(lines):
            frame = json.loads(row)

            path = RAW_FRAMES_OUTPUT_DIR + '/img' + str(i) + '.jpg'
            image = cv2.imread(path, cv2.IMREAD_COLOR)

            for face in frame:

                box = face["faceRectangle"]

                # Remove the bottom box
                if box["top"] > BOTTOM_BOXES:
                    continue

                emotion = calc_max_emotion(face["faceAttributes"]["emotion"])

                # Make Box
                bb.add(image, box['left'], box['top'],
                       box['left'] + box['width'], box['top'] + box['height'],
                       emotion, EMOTION_COLOR_DICT[emotion])

                # Add emoji
                overlay = cv2.imread(EMOJI_IMAGES_PATH + '\\' + emotion +
                                     '.png')
                rows, cols, channels = overlay.shape

                # Finding emoji loc

                if box['left'] < LEFT_BOX:
                    emoji_coods = LEFT_EMOJI

                elif box['top'] < TOP_RIGHT_BOX:
                    emoji_coods = TOP_RIGHT_EMOJI

                else:
                    emoji_coods = BOTTOM_RIGHT_EMOJI

                overlay = cv2.addWeighted(
                    image[emoji_coods[1]:emoji_coods[1] + rows,
                          emoji_coods[0]:emoji_coods[0] + cols], 0.2, overlay,
                    0.8, 0)

                image[emoji_coods[1]:emoji_coods[1] + rows,
                      emoji_coods[0]:emoji_coods[0] + cols] = overlay

            cv2.imwrite(RESULT_FRAMES_PATH + '/img' + str(i) + '.png', image)
def predict(image, net, layer, label, default_colors):
    (imageHeight, imageWidth) = image.shape[:2]
    #Detect object
    blob = cv2.dnn.blobFromImage(image,
                                 0.00392, (416, 416), (0, 0, 0),
                                 True,
                                 crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(layer)
    # Box dimensions
    boxes = []
    confidences = []
    classIDs = []
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > CONFIDENCE:
                box = detection[0:4] * np.array(
                    [imageWidth, imageHeight, imageWidth, imageHeight])
                (centerX, centerY, width, height) = box.astype("int")
                #possible make value < 0
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                print(x, y, width, height)
                if (y < 0):
                    y = 0
                if (x < 0):
                    x = 0
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    # Draw labels
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE, NMS_THRES)

    if len(idxs) > 0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            text = "{}: {:.4f}".format(label[classIDs[i]], confidences[i])
            print(text)
            bb.add(image, x, y, x + w, y + h, text,
                   default_colors[classIDs[i]])

    listClassne = ','.join([str(n) for n in classIDs])
    print((listClassne))
    return image, listClassne
Esempio n. 8
0
def main():
   while(True):
        ret, frame = cap.read()
        result = model.result(frame)
        data_value = result[0]
        image = cv2.resize(frame,(int(720),int(480)))
        bb.add(image, beberoudo[0], beberoudo[1], beberoudo[0]+beberoudo[2], beberoudo[1]+beberoudo[3], "bebedouro", "aqua")
        bb.add(image, cocho[0], cocho[1], cocho[0]+ cocho[2], cocho[1]+cocho[3], "COCHO STATUS: " + data_value, "orange")
        img = image
        #roi_cocho = result[1]
        #cv2.imshow("roi sensor",roi_cocho)
        cv2.imshow('frame',img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

   cap.release()
   cv2.destroyAllWindows()
 def draw_bbox(pnid_img):
     img_arr = np.array(pnid_img)
     height, width = img_arr.shape[:-1]
     img_arr_copy = img_arr[:, :, ::-1].copy()
     for detected_item in self.data:
         bbox = detected_item.bounding_box
         label = detected_item.text
         bb.add(
             img_arr_copy,
             int(bbox["xMin"] * width),
             int(bbox["yMin"] * height),
             int(bbox["xMax"] * width),
             int(bbox["yMax"] * height),
             label,
             "red",
         )
     return Image.fromarray(img_arr_copy[:, :, ::-1])
Esempio n. 10
0
def process_image(image_data):
    image = cv2.imread(image_data.image_path)

    image = cv2.putText(image, image_data.image_name, (5, 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    color_list = [
        "maroon", "green", "yellow", "purple", "fuchsia", "lime", "red",
        "silver"
    ]
    for ann in image_data.annotations:
        id_color = random.randint(0, 7)
        box_color = color_list[id_color]

        bb.add(image, ann.xmin, ann.ymin, ann.xmax, ann.ymax, ann.name,
               box_color)
        #image = cv2.rectangle(image, (ann.xmin, ann.ymin), (ann.xmax, ann.ymax), box_color, args.line_thickness)
        #image = cv2.putText(image, ann.name, (ann.xmin, ann.ymin), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
    return image
Esempio n. 11
0
    def predict(self, raw_image):
        x, img = data.transforms.presets.ssd.transform_test(
            nd.array(raw_image), short=INPUT_H)
        print('Shape of pre-processed image:', x.shape)

        class_IDs, scores, bounding_boxes = self.net(x)
        print(bounding_boxes[0][0])
        for box in bounding_boxes:
            #    print(box)
            #    print(box)
            bb.add(img, box[0][0], box[0][3], box[0][1], box[0][2], "a",
                   (255, 255, 255))
        # print(bounding_boxes.shape)

        # ax = utils.viz.plot_bbox(x, bounding_boxes[0], scores[0],
        #                         class_IDs[0], class_names=self.net.classes)
        # plt.show()

        return [class_IDs, scores, bounding_boxes]
def make_box():

    with open(EMOTION_CSV_FILE, 'r') as csvFile:
        reader = csv.reader(csvFile)
        for i, row in enumerate(reader):
            if i == 0:
                continue
            path = RAW_FRAMES_OUTPUT_DIR + '/img' + str(row[0]) + '.jpg'
            image = cv2.imread(path, cv2.IMREAD_COLOR)
            row[1] = row[1].replace("\'", "\"")
            box = json.loads(row[1])

            if not box:
                direction = WRONG

            else:
                direction = CORRECT

                if box['top'] + box['height'] > BOX['bottom']:
                    direction = DOWN
                elif box['top'] < BOX['top']:
                    direction = UP
                elif box['left'] + box['width'] > BOX['right']:
                    direction = RIGHT
                elif box['left'] < BOX['left']:
                    direction = LEFT

                bb.add(image, BOX['left'], BOX['top'], BOX['right'],
                       BOX['bottom'], direction['label'], direction['color'])

            # Add sign
            overlay = cv2.imread(EMOJI_IMAGES_PATH + '\\' + direction['image'])
            rows, cols, channels = overlay.shape

            overlay = cv2.addWeighted(
                image[SIGN_ROW:SIGN_ROW + rows, SIGN_COL:SIGN_COL + cols], 0,
                overlay, 0.8, 0.2)
            image[SIGN_ROW:SIGN_ROW + rows, SIGN_COL:SIGN_COL + cols] = overlay

            cv2.imwrite(RESULT_FRAMES_PATH + '/img' + row[0] + '.png', image)
Esempio n. 13
0
def _plot_boxes(img: Image,
                bboxes: np.ndarray,
                scores: Optional[List] = None,
                class_map: Optional[Dict] = dict(),
                class_color_map: Optional[Dict] = dict(),
                **kwargs):
    draw_img = np.array(img)
    for i, box in enumerate(bboxes):
        bbox = list(map(lambda x: max(0, int(x)), box[:-1]))
        if not isinstance(box[-1], str):
            category = class_map.get(int(box[-1]), str(int(box[-1])))
        else:
            category = box[-1]
        if kwargs.get("truncate_label", None) is not None:
            category = "".join([
                l[0].lower()
                for l in category.split(kwargs.get("truncate_label"))
            ])
        if scores is not None:
            category = category + ":" + str(round(scores[i], 2))
        color = class_color_map.get(int(box[-1]), "green")
        bb.add(draw_img, *bbox, category, color=color)
    return Image.fromarray(draw_img)
Esempio n. 14
0
def vis_detection(im_orig, detections, class_names, file='', thresh=0.7):
    """visualize [cls, conf, x1, y1, x2, y2]"""
    cmap = [
        'black', 'navy', 'blue', 'silver', 'aqua', 'teal', 'olive', 'purple',
        'green', 'fuchsia', 'lime', 'red', 'yellow', 'orange', 'red', 'maroon',
        'fuchsia', 'purple', 'black', 'gray', 'silver'
    ]

    im_orig = cv2.cvtColor(im_orig, cv2.COLOR_BGR2RGB)

    for [cls, conf, x1, y1, x2, y2] in detections:
        cls = int(cls)
        if cls > 0 and conf > thresh:

            bb.add(im_orig, int(x1), int(y1), int(x2), int(y2),
                   '{:s} {:.3f}'.format(class_names[cls], conf), cmap[cls])

    # plt.axis('off')
    # # plt.show()
    # plt.savefig('test.png')
    # cv2.imshow("d", im_orig)
    cv2.imwrite(
        os.path.join('/home/skutukov/Pictures/',
                     file.strip() + '222.jpg'), im_orig)
Esempio n. 15
0
def img_detect(params):
    arguments = {
        "image": "file_upload.jpg",
        "label": "yolo-coco/{}".format(params["names"]),
        "weight": "yolo-coco/{}".format(params["weight"]),
        "config": "yolo-coco/{}".format(params["config"]),
        "threshold": params["threshold"],
        "confidence": params["confidence"]
    }

    LABELS = open(arguments["label"]).read().strip().split("\n")
    COLORS = helper.get_rand_colors(len(LABELS))

    weightsPath = arguments["weight"]
    configPath = arguments["config"]

    ########################### reference ###########################
    print("[INFO] loading YOLO from disk...")
    net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
    image = cv2.imread(arguments["image"])

    (H, W) = image.shape[:2]
    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    blob = cv2.dnn.blobFromImage(image,
                                 1 / 255.0, (416, 416),
                                 swapRB=True,
                                 crop=False)

    net.setInput(blob)

    layerOutputs = net.forward(ln)

    boxes = []
    confidences = []
    classIDs = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > arguments["confidence"]:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    idxs = cv2.dnn.NMSBoxes(boxes, confidences, arguments["confidence"],
                            arguments["threshold"])

    ########################### reference ###########################
    obj = []
    if len(idxs) > 0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            text = "{}".format(LABELS[classIDs[i]])
            color = COLORS[classIDs[i]]
            bb.add(image, x, y, x + w, y + h, text, color)
            bb.add(image, x, y, x + w, y + h, text)

            if classIDs[i] not in obj:
                obj.append(classIDs[i])

# show the output image
    retval, buffer = cv2.imencode('.png', image)
    jpg_as_text = base64.b64encode(buffer)
    restext = jpg_as_text.decode(encoding='UTF-8')

    response = {"res": restext, "obj": obj}

    return response
Esempio n. 16
0
def main():
    in_path = os.path.join("docs", "images", "winton.jpg")
    out_path = os.path.join("docs", "images", "winton_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 281, 12, 744, 431, "Winton", "maroon")
    bb.add(image, 166, 149, 500, 297, "Trumpet", "yellow")
    show_and_save("Winton MARSALIS", image, out_path)

    in_path = os.path.join("docs", "images", "khatia.jpg")
    out_path = os.path.join("docs", "images", "khatia_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 280, 24, 802, 593, "Khatia", "maroon")
    bb.add(image, 687, 1, 1448, 648, "Piano", "gray")
    bb.add(image, 888, 492, 1190, 536, "Text")
    show_and_save("Khatia BUNIATISHVILI", image, out_path)

    in_path = os.path.join("docs", "images", "clarifloue.jpg")
    out_path = os.path.join("docs", "images", "clarifloue_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 69, 86, 470, 136, label="Headache designer")
    bb.add(image, 136, 196, 406, 234, "Text")
    bb.add(image, 67, 351, 471, 400, "Headache designer")
    bb.add(image, 130, 456, 390, 494, "Text")
    show_and_save("Clarinet", image, out_path)

    in_path = os.path.join("docs", "images", "nao-romeo-pepper.jpg")
    out_path = os.path.join("docs", "images", "nao-romeo-pepper_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 155, 152, 244, 297, "Nao")
    bb.add(image, 260, 6, 423, 416, "Romeo")
    bb.add(image, 421, 76, 547, 402, "Pepper")
    show_and_save("Robots", image, out_path)

    in_path = os.path.join("docs", "images", "ski-paraglider.jpg")
    out_path = os.path.join("docs", "images", "ski-paraglider_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 0, 128, 645, 589, "Paraglider", "orange")
    bb.add(image, 689, 442, 818, 566, "Skier", "gray")
    show_and_save("Ski and paraglider", image, out_path)

    in_path = os.path.join("docs", "images", "paragliders.jpg")
    out_path = os.path.join("docs", "images", "paragliders_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 90, 228, 318, 428, "Paraglider")
    bb.add(image, 521, 110, 656, 415, "Paraglider")
    show_and_save("Pretty Bounding Box", image, out_path)

    in_path = os.path.join("docs", "images", "selfie.jpg")
    out_path = os.path.join("docs", "images", "selfie_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 5, 7, 150, 169, "Female", "fuchsia")
    bb.add(image, 116, 7, 193, 113, "Male", "blue")
    bb.add(image, 189, 7, 291, 124, "Female", "fuchsia")
    bb.add(image, 288, 25, 355, 114, "Male", "blue")
    bb.add(image, 367, 0, 448, 92, "Male", "blue")
    bb.add(image, 435, 29, 506, 104, "Female", "fuchsia")
    bb.add(image, 497, 3, 597, 111, "Female", "fuchsia")
    bb.add(image, 110, 133, 213, 245, "Female", "fuchsia")
    bb.add(image, 176, 120, 293, 289, "Female", "fuchsia")
    bb.add(image, 314, 115, 470, 357, "Male", "blue")
    bb.add(image, 468, 72, 577, 226, "Male", "blue")
    show_and_save("The Selfie", image, out_path)

    in_path = os.path.join("docs", "images", "pobb.jpg")
    out_path = os.path.join("docs", "images", "pobb_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 76, 62, 155, 271, "Female", "fuchsia")
    bb.add(image, 157, 44, 288, 274, "Male", "blue")
    bb.add(image, 224, 64, 317, 274, "Male", "blue")
    bb.add(image, 290, 48, 383, 277, "Male", "blue")
    bb.add(image, 350, 42, 458, 276, "Female", "fuchsia")
    bb.add(image, 416, 17, 510, 279, "Male", "blue")
    bb.add(image, 482, 55, 573, 278, "Female", "fuchsia")
    bb.add(image, 547, 63, 615, 277, "Female", "fuchsia")
    bb.add(image, 608, 49, 704, 275, "Female", "fuchsia")
    bb.add(image, 672, 34, 767, 274, "Male", "blue")
    bb.add(image, 725, 62, 813, 273, "Female", "fuchsia")
    bb.add(image, 786, 38, 887, 267, "Male", "blue")
    bb.add(image, 864, 51, 959, 266, "Male", "blue")
    show_and_save("POBB", image, out_path)
def predict(image):
    
    # initialize a list of colors to represent each possible class label
    np.random.seed(15)
    COLORS = ["blue", "yellow", "red", "green"]
    (H, W) = image.shape[:2]
    
    # determine only the "ouput" layers name which we need from YOLO
    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    
    # construct a blob from the input image and then perform a forward pass of the YOLO object detector, 
    # giving us our bounding boxes and associated probabilities
    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=False, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)
    
    boxes = []
    confidences = []
    classIDs = []
    threshold = 0.3
    
    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # extract the class ID and confidence (i.e., probability) of
            # the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # filter out weak predictions by ensuring the detected
            # probability is greater than the minimum probability
            # confidence type=float, default=0.5
            if confidence > threshold:
                # scale the bounding box coordinates back relative to the
                # size of the image, keeping in mind that YOLO actually
                # returns the center (x, y)-coordinates of the bounding
                # box followed by the boxes' width and height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # use the center (x, y)-coordinates to derive the top and
                # and left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # update our list of bounding box coordinates, confidences,
                # and class IDs
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    # apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, threshold, 0.3)

    print (idxs)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # draw a bounding box rectangle and label on the image
            color = str(np.random.choice(COLORS, 1)[0])
            text = "{}".format(LABELS[classIDs[i]], confidences[i])
            bb.add(image,x,y,x+w,y+h,text,color)
            #cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
            #cv2.putText(image, text, (x +15, y - 10), cv2.FONT_HERSHEY_SIMPLEX,1, color, 2)

        return image, LABELS[classIDs[i]], confidences[i]

    return image, None, None
Esempio n. 18
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    img_ids = None
    if not img_ids:
        img_ids = sample(list(json.load(open(gqa_val_sgs)).keys()), NUM_IMGS_TO_TEST)
    ref_objects_dict = {value[0]: key for key, value in json.load(open(ref_objs_dict, 'r')).items()}
    data_dict = json.load(open(labels_path), object_pairs_hook=OrderedDict)
    obj_labels = data_dict['relevant_objs']
    if not 'BACKGROUND' in obj_labels:
        obj_labels.append('BACKGROUND')
    obj_labels_dict = {i: obj_labels[i] for i in range(len(obj_labels))}
    att_labels = data_dict['relevant_atts']
    att_labels_dict = {i: att_labels[i] for i in range(len(att_labels))}

    att_categories = None
    if categorize_atts:
        att_categories = json.load(open(ATT_CATEGORIES_FILE, 'r'))
        att_categories = \
            {key: list(value.keys()) for key, value in att_categories.items() if key not in CATEGORIES_TO_DROP}

    model = InferenceMLPModel(mlp_params['hidden_dim'], mlp_params['input_dim'], len(obj_labels_dict),
                              len(att_labels_dict), att_categories).to(device)
    model.load_state_dict(torch.load(ckpt_path))
    model.eval()
    ref_model = MLPModel(128, 2048, len(ref_objects_dict)).to(device)
    ref_model.load_state_dict(torch.load(ref_objects_detector_ckpt))
    ref_model.eval()

    with h5py.File(gqa_data_file, 'r') as data_f, torch.set_grad_enabled(False):
        for img_id in tqdm(img_ids, desc='Evaluating images...'):
            features = data_f['features_' + img_id][()]
            tensor_features = torch.Tensor(features).unsqueeze(0).to(device)
            bboxes = data_f['bboxes_' + img_id][()]
            img = cv2.imread(imgs_path.format(img_id))
            img_copy = img.copy()
            img_resized = cv2.resize(img.copy(), (0, 0), fx=2, fy=2)
            pred_obj_labels, pred_obj_probs, pred_att_labels, pred_att_probs = \
                model(tensor_features, np.array([features.shape[0]]))
            pred_obj_labels = pred_obj_labels[0]
            pred_obj_probs = pred_obj_probs[0]
            if WITH_ATTS and not categorize_atts:
                pred_att_labels = pred_att_labels[0]
                pred_att_probs = pred_att_probs[0]

            relevant_bboxes_data = []
            for i in range(len(pred_obj_labels)):
                cur_label = []
                if obj_labels_dict[pred_obj_labels[i]] == 'BACKGROUND':
                    continue
                if pred_obj_probs[i] > OBJ_CONF_THRESH:
                    cur_label.append(obj_labels_dict[pred_obj_labels[i]])
                    if WITH_ATTS:
                        if categorize_atts:
                            for att_category in CATEGORIES_TO_SHOW:
                                if pred_att_probs[att_category][i] > ATT_CONF_THRESH:
                                    cur_category_label = pred_att_labels[att_category][i]
                                    cur_label.insert(0, att_categories[att_category][cur_category_label])
                        elif pred_att_probs[i] > ATT_CONF_THRESH:
                            cur_label.insert(0, pred_att_labels[i])

                    relevant_bboxes_data.append((i, ' '.join(cur_label)))

            for box_data in relevant_bboxes_data:
                box_id, box_label = box_data
                cur_bbox = bboxes[box_id, :]
                bb.add(img, cur_bbox[0], cur_bbox[1], cur_bbox[2], cur_bbox[3], box_label)

            # visualise reference model
            ref_preds = ref_model(tensor_features.squeeze(0)).cpu().detach().numpy()
            ref_pred_labels = np.argmax(ref_preds, axis=1)
            ref_pred_probs = np.max(ref_preds, axis=1)
            relevant_bboxes_data_ref = [(i, ref_pred_labels[i]) for i in range(len(ref_pred_probs))
                                        if ref_pred_probs[i] > REF_CONF_THRESH]
            for box_data in relevant_bboxes_data_ref:
                box_id, box_label = box_data
                cur_bbox = bboxes[box_id, :]
                bb.add(img_copy, cur_bbox[0], cur_bbox[1], cur_bbox[2], cur_bbox[3], ref_objects_dict[box_label])
            imgs_concat = np.concatenate((img_resized, np.concatenate((img, img_copy), axis=1)), axis=0)
            cv2.imwrite(os.path.join(output_path, img_id + '.jpg'), imgs_concat)
Esempio n. 19
0
                 violate.add(i)
                 violate.add(j)
                 
 # loop over the results
 for (i, (prob, bbox, centroid)) in enumerate(results):
     (startX, startY, endX, endY) = bbox
     (cX, cY) = centroid
     color = "green"
     
     # if the index pair exists within the violation set, then
     # update the color
     if i in violate:
         color = "red"
     
     # draw bounding box around the person and his/her centorid
     bb.add(frame,startX,startY-5,endX,endY-5,color=color)
     cv2.circle(frame, (cX, cY), 3, colors[color], -1)
     
 # display the toal number of social distance violation
 text = "Social Distancing Violation: {:d}".format(len(violate))
 cv2.putText(frame, text, (10, 25),
             cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 2)
 
 frame_show = cv2.resize(frame, (1280,768))
 cv2.imshow("Social distancing", frame_show)
 key = cv2.waitKey(1) & 0xFF
 
 if key == 27:
     break
 
 """
Esempio n. 20
0
'''Test img '''

test_image = face_recognition.load_image_file(img_dir)

# Find all the faces in the test image using the default HOG-based model
face_locations = face_recognition.face_locations(test_image)
no = len(face_locations)
print("Number of faces detected: ", no)

# Predict all the faces in the test image using the trained classifier
print("Found:")
for i in range(no):
    test_image_enc = face_recognition.face_encodings(test_image)[i]
    name = model.predict([test_image_enc])
    print(*name)

x_min = face_locations[0][2]
y_min = face_locations[0][0]
x_max = face_locations[0][1]
y_max = face_locations[0][3]

color_list = [
    "maroon", "green", "yellow", "purple", "fuchsia", "lime", "red", "silver"
]
id_color = random.randint(0, 7)
box_color = color_list[id_color]
bb.add(test_image, x_min, y_min, x_max, y_max, str(name[0]), box_color)

cv2.imshow('demo', test_image)
cv2.waitKey(0)
Esempio n. 21
0
def addBox(row, image, color, labelName='label', labelAug=''):
    bb.add(image, row.bbl, row.bbt, row.bbr, row.bbb,
           row[labelName] + labelAug, color)
Esempio n. 22
0
            # getting contours
            contours, img = cv2.findContours(binary, cv2.RETR_TREE,
                                             cv2.CHAIN_APPROX_SIMPLE)
        except:
            break

        cv2.line(frame, (415, LINE_POSITION_1), (715, LINE_POSITION_1),
                 (0, 255, 0), 3)
        cv2.line(frame, (235, LINE_POSITION_2), (878, LINE_POSITION_2),
                 (0, 0, 255), 3)

        for c in contours:
            x, y, w, h = cv2.boundingRect(c)
            x_c, y_c = center(cv2.boundingRect(c))  # getting center

            # contour validation
            if (y_c > LINE_POSITION_1) and (y_c < LINE_POSITION_2) and (
                    x_c > 235) and (y_c < 878):
                contour_validation = (w >= MIN_WIDTH) and (
                    h >= MIN_HEIGTH) and (w <= MAX_WIDTH) and (h <= MAX_HEIGHT)
                if contour_validation:
                    bb.add(frame, x, y, x + w, y + h, 'CAR', 'yellow')

        cv2.imshow("Result", frame)

        if cv2.waitKey(1) == 27:
            break

    cv2.destroyAllWindows()
    cap.release()
Esempio n. 23
0
# print(inference_result.examples[0].lrtb[0])

# for root, dirs, files in os.walk(INPUT_IMG_DIR):
# print(files)

assert (len(inference_result.examples[0].image_id) == len(
    inference_result.examples[0].bounding_box_lrtb))

for i in range(len(inference_result.examples[0].image_id)):
    image_id = inference_result.examples[0].image_id[i]
    bounding_box_lrtb = inference_result.examples[0].bounding_box_lrtb[i]

    input_image_path = INPUT_IMG_DIR + "/" + str(image_id).zfill(12) + ".jpg"
    output_image_path = OUTPUT_IMG_DIR + "/" + str(i) + ".jpg"

    # assert(bounding_box_lrtb[0] < image)
    image = cv2.imread(input_image_path, cv2.IMREAD_COLOR)

    print("bounding_box: ", bounding_box_lrtb)
    print("image: ", image.shape)
    """
    assert(bounding_box_lrtb[0] <= image.shape[1])
    assert(bounding_box_lrtb[1] <= image.shape[1])
    assert(bounding_box_lrtb[2] <= image.shape[0])
    assert(bounding_box_lrtb[3] <= image.shape[0])
    """

    bounding_box.add(image, bounding_box_lrtb[0], bounding_box_lrtb[2],
                     bounding_box_lrtb[1], bounding_box_lrtb[3])
    cv2.imwrite(output_image_path, image)
Esempio n. 24
0
def main():
    
    # initialize a list storing counted objectID
    counted_objectID = []
    
    # intialize frame dimensions
    H,W = None, None
    
    """
     initialize our centroid tracker, then initialize a lost to store
     each of our dlib correlation trackers, followed by a dictionary to 
     map each unique object ID to a TrackableOject 
    """ 
    ct = CentroidTracker(maxDisappeared=conf["max_disappear"],
                         maxDistance=conf["max_distance"])
    trackers = []
    trackableObjects = {}
    
    # keep the count of total number of frames
    totalFrame = 0
    
    net = load_model()
    cap = cv2.VideoCapture(args["video"])
    time.sleep(1)
    
    # initilize the coordiate of counting line
    _, frame = cap.read()
    count_line = [(0,frame.shape[0]-conf["line_coordinate"]),
                  (frame.shape[1],frame.shape[0]-conf["line_coordinate"])]
    
    # initialize car counting variable
    car_count = 0
    
    if args["save"]:
        video_size = (frame.shape[1]+250,frame.shape[0])
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        writer = cv2.VideoWriter("processed_video.avi",fourcc,24,video_size)

    while True:
        ret, frame = cap.read()
        # coordinate of countign line
        
        # brekout the loop if no frame is captured
        if frame is None:
            break
       
        # save origin frame for later displaying
        origin_frame = frame.copy()
        
        # crop frame before countign line
        frame = frame[:frame.shape[0]-conf["line_coordinate"]-25,:,:]
        
        # convert to RGB colour space
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        
        # if the frame dimensions are empty, set them
        if W is None or H is None:
            (H,W) = frame.shape[:2]
        
        """
        initialize our list of bounding box rectangles returned by 
        either (1) our object detector or (2) the correlation trackers
        """
        rects = []
        
        """
        check to see if we should run a more computationally expensive
        object detection method to add our tracker
        """
        if totalFrame % conf["track_object"] == 0:
            # initialize our new set of object trackers
            trackers = []
            
            """
            convert the frame to a blob and pass the blob 
            through the netwrok and obtain the detections
            """
            #blob = cv2.dnn.blobFromImage(frame, size=(300,300),ddepth=cv2.CV_8U)
            blob = cv2.dnn.blobFromImage(frame,ddepth=cv2.CV_8U)
            #net.setInput(blob, scalefactor=1.0/127.5, mean=[127.5,127.5,127.5])
            net.setInput(blob, scalefactor=1.0/255, mean=[255,255,255])
            detections = net.forward()
            

            # loop over the detections
            for i in np.arange(0, detections.shape[2]):
                """
                extract the confidence (i.e., probability)
                associate with the predicton
                """
                confidence = detections[0,0,i,2]
                
                """
                filter out weak detections by 
                setting a threshold confidence
                """
                if confidence > conf["confidence"]:
                    """
                    extract the index of the class label
                    from detection list
                    """
                    idx = int(detections[0,0,i,1])
                    
                    # if the class label is not a car, skip it
                    if CLASSES[idx] != "car":
                        continue
                    
                    """
                    compute the (x,y)-coordinates of the 
                    bounding box for the object
                    """
                    box = detections[0,0,i,3:7] * np.array([W,H,W,H])
                    (startX, startY, endX, endY) = box.astype("int")
                    
                    """
                    construct a dlib rectangle object from the bounding
                    box coordinates and then start the dlib correlation tracker
                    """
                    tracker = dlib.correlation_tracker()
                    rect = dlib.rectangle(startX,startY, endX, endY)
                    tracker.start_track(rgb,rect)
                    
                    """
                    add the tracker to our list of trackers
                    so we can utilize it during skip frames
                    """
                    trackers.append(tracker)
            """
        otherwise, we should utilize our object "trackes" rather than 
        object "detectors" to obtain a higher frame preprocessing
            """
        else:
            # loop over the tracker
            for tracker in trackers:
                tracker.update(rgb)
                pos = tracker.get_position()
                
                # unpack the position project
                post_list = [pos.left(),pos.top(),pos.right(), pos.bottom()]
                [startX, startY, endX, endY] = list(map(int,post_list))         
                
                # add the bounding box coordinate to the rectangle list
                rects.append((startX,startY,endX,endY))
                
        """
        use the centroid tracker to associate the (1) old object
        centroids with (2) the newly computed object centroids
        """
        objects = ct.update(rects)
        
        # loop over the tracked objects
        for (objectID, centroid),rect in zip(objects.copy().items(),rects):
            
            # if objectID is already counted then skip it
            if objectID in counted_objectID:
                ct.deregister(objectID)
                rects.remove(rect)
                trackers.remove(tracker)
                objects = ct.update(rects)
                break
            else:
                """
                if centroid of the car cross count line 
                then increment car_count
                """
                if (centroid[1] + 60 > count_line[0][1]): 
                    rects.remove(rect)
                    trackers.remove(tracker)
                    counted_objectID.append(objectID)
                    ct.deregister(objectID)
                    objects = ct.update(rects)
                    car_count+=1
                    break
            
            """
            check to see if a trackable object exists
            for the current object ID
            """
            to = trackableObjects.get(objectID, None)
            
            # if there is no exisiting trackable object, create one
            if to is None:
                to = TrackableObject(objectID, centroid)
            
            # store the trackable object in our dictinonary
            trackableObjects[objectID] = to
            
            
            
            """
            draw both the ID of the object and the centroid 
            of the object on the output frame
            """
            
            text = "ID {}".format(objectID)
            bb.add(origin_frame,rect[0],rect[1],rect[2],rect[3],"car","green")
            cv2.putText(origin_frame, text, (centroid[0] - 10, centroid[1] - 10), 
                  cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
            cv2.circle(origin_frame, (centroid[0], centroid[1]), 4,
                       (0, 255, 0), -1)
            
        
        # create a blank space next to frame to display No. cars
        blank_region = np.ones((origin_frame.shape[0],250,3), np.uint8)*255
        cv2.putText(blank_region, "No. car(s):", (40,origin_frame.shape[0]//2-120),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
        
    
        cv2.line(origin_frame,count_line[0],count_line[1],(0,255,0),3)
        
        # stack the frame with blank space
        cv2.putText(blank_region, str(car_count), (40,origin_frame.shape[0]//2),
                    cv2.FONT_HERSHEY_SIMPLEX, 4, (0, 0, 0), 3)
        
        stack_image = np.concatenate((origin_frame,blank_region),axis=1)
        cv2.imshow("Final result", stack_image)
        
        # save processed videos
        if args["save"]:
            writer.write(stack_image)
            
        # press ESC to terminate the system
        key = cv2.waitKey(1)  & 0xff
        
        if key == 27:
            break
        
        # increment the total number of frame processed so far 
        totalFrame += 1
        
        
    cap.release()
    cv2.destroyAllWindows()
    if args["save"]:
        writer.release()
Esempio n. 25
0
def draw_bbox(img, bbox, conf=None, color='red'):
  boxtext = str(conf) if conf is not None else 'face'
  bb.add(img, *bbox, boxtext, color)
  return img
Esempio n. 26
0
 def visualize(self, image, pred):
     img = Model.img2arr(image)
     rois, class_ids = EfficientDetModel.parse(pred)
     for i, roi in enumerate(rois):
         bb.add(img, roi[0],roi[1],roi[2],roi[3], str(class_ids[i]))
     Image.fromarray(img).show()
    all_frames = load_all_seq_frames(frames_folder)

    # Printing sequence info
    print_sequence_info(gt)

    # Make directory to save annotated images
    if not os.path.exists("results/" + sequence):
        os.makedirs(os.path.dirname("results/" + sequence), exist_ok=True)

    # Write bounding boxes in each frame and save each frame
    num_frames = len(all_frames)
    for i in range(num_frames):
        frame = all_frames[i][0].copy()
        for line in gt:
            if line[0] == i + 1:
                bb.add(frame, line[2], line[3], line[2] + line[4],
                       line[3] + line[5], line[1], 'green')
        cv2.imwrite("results/" + sequence + "{}.jpg".format(i), frame)
    #    0       1      2           3         4            5          6     7    8    9
    # <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z>
"""
vc = cv2.VideoCapture(path)
W = vc.get(cv2.CAP_PROP_FRAME_WIDTH)
H = vc.get(cv2.CAP_PROP_FRAME_HEIGHT)
size = (int(W), int(H))

out = cv2.VideoWriter('{}.mp4'.format(video_sequence_name), cv2.VideoWriter_fourcc(*'DIVX'), 20, size)

while True:
    frame = vc.read()
    frame = frame[1]
    if frame is None:
Esempio n. 28
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    else:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=80)
        # raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    annotation_file = '{}/annotations/instances_{}.json'.format(
        cfg.data_dir, cfg.data_type)
    coco = COCO(annotation_file)

    # Load all annotations
    cats = coco.loadCats(coco.getCatIds())
    nms = [cat['name'] for cat in cats]
    catIds = coco.getCatIds(catNms=nms)
    # imgIds = coco.getImgIds(catIds=catIds)
    imgIds = coco.getImgIds()
    # annIds = coco.getAnnIds(catIds=catIds)
    # all_anns = coco.loadAnns(ids=annIds)
    # print(len(imgIds), imgIds)

    for id in imgIds:
        annt_ids = coco.getAnnIds(imgIds=[id])
        annotations_per_img = coco.loadAnns(ids=annt_ids)
        # print('All annots: ', len(annotations_per_img), annotations_per_img)
        img = coco.loadImgs(id)[0]
        image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type,
                                          img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:
            continue

        img_original = cv2.imread(image_path)
        img_connect = cv2.imread(image_path)
        img_recon = cv2.imread(image_path)
        print('Image id: ', id)

        for annt in annotations_per_img:
            if annt['iscrowd'] == 1 or type(annt['segmentation']) != list:
                continue

            polygons = get_connected_polygon_using_mask(
                annt['segmentation'], (h_img, w_img),
                n_vertices=cfg.num_vertices,
                closing_max_kernel=60)
            gt_bbox = annt['bbox']
            gt_x1, gt_y1, gt_w, gt_h = gt_bbox
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > cfg.num_vertices:
                resampled_contour = resample(contour, num=cfg.num_vertices)
            else:
                resampled_contour = turning_angle_resample(
                    contour, cfg.num_vertices)

            resampled_contour[:, 0] = np.clip(resampled_contour[:, 0], gt_x1,
                                              gt_x1 + gt_w)
            resampled_contour[:, 1] = np.clip(resampled_contour[:, 1], gt_y1,
                                              gt_y1 + gt_h)

            clockwise_flag = check_clockwise_polygon(resampled_contour)
            if not clockwise_flag:
                fixed_contour = np.flip(resampled_contour, axis=0)
            else:
                fixed_contour = resampled_contour.copy()

            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            x1, y1, x2, y2 = gt_x1, gt_y1, gt_x1 + gt_w, gt_y1 + gt_h

            # bbox_width, bbox_height = x2 - x1, y2 - y1
            # bbox = [x1, y1, bbox_width, bbox_height]
            # bbox_center = np.array([(x1 + x2) / 2., (y1 + y2) / 2.])
            bbox_center = np.mean(indexed_shape, axis=0)

            centered_shape = indexed_shape - bbox_center

            # visualize resampled points with multiple parts in image side by side
            for cnt in range(len(annt['segmentation'])):
                polys = np.array(annt['segmentation'][cnt]).reshape((-1, 2))
                cv2.polylines(img_original, [polys.astype(np.int32)],
                              True, (10, 10, 255),
                              thickness=2)
                # cv2.drawContours(img_original, [polys.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            cv2.polylines(img_connect, [indexed_shape.astype(np.int32)],
                          True, (10, 10, 255),
                          thickness=2)
            # cv2.drawContours(img_connect, [indexed_shape.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            learned_val_codes, _ = fast_ista(centered_shape.reshape((1, -1)),
                                             dictionary,
                                             lmbda=0.1,
                                             max_iter=60)
            recon_contour = np.matmul(learned_val_codes, dictionary).reshape(
                (-1, 2))
            recon_contour = recon_contour + bbox_center
            cv2.polylines(img_recon, [recon_contour.astype(np.int32)],
                          True, (10, 10, 255),
                          thickness=2)
            # cv2.drawContours(img_recon, [recon_contour.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            # plot gt mean and std
            # image = cv2.imread(image_path)
            # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])),
            # #             axes=(int(contour_std[0]), int(contour_std[1])),
            # #             angle=0, startAngle=0, endAngle=360, color=(0, 255, 0),
            # #             thickness=2)
            # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)),
            #               pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)),
            #               color=(0, 255, 0), thickness=2)
            # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255))
            # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))),
            #               pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))),
            #               color=(255, 0, 0), thickness=2)
            # cv2.imshow('GT segments', image)
            # if cv2.waitKey() & 0xFF == ord('q'):
            #     break

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                # segms, codes_ = ctsegm_scaled_decode_debug(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                #                             K=cfg.test_topk)
                segms = ctsegm_code_n_offset_decode(
                    *output,
                    torch.from_numpy(dictionary.astype(np.float32)).to(
                        cfg.device),
                    K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(
                    1, -1, segms.shape[2])[0]
                # codes_ = codes_.detach().cpu().numpy().reshape(1, -1, codes_.shape[2])[0]

                top_preds = {}
                code_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(
                        segms[:, 2 * j:2 * j + 2], imgs[scale]['center'],
                        imgs[scale]['scale'],
                        (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 +
                      2] = transform_preds(
                          segms[:,
                                cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 +
                      4] = transform_preds(
                          segms[:, cfg.num_vertices * 2 +
                                2:cfg.num_vertices * 2 + 4],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 +
                                             5].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale
                    # code_preds[j + 1] = codes_[inds, :]

                segmentations.append(top_preds)
                predicted_codes.append(code_preds)

            segms_and_scores = {
                j: np.concatenate([d[j] for d in segmentations], axis=0)
                for j in range(1, num_classes + 1)
            }  # a Dict label: segments
            # codes_and_scores = {j: np.concatenate([d[j] for d in predicted_codes], axis=0)
            #                     for j in range(1, num_classes + 1)}  # a Dict label: segments
            scores = np.hstack([
                segms_and_scores[j][:, cfg.num_vertices * 2 + 4]
                for j in range(1, num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 +
                                                     4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]
                    # codes_and_scores[j] = codes_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            output_image = original_image
            blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)
            # print(blend_mask.shape)

            for lab in segms_and_scores:
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    # c_ = codes_and_scores[lab][idx]
                    # for res in segms_and_scores[lab]:
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, w_img)
                    bbox[1] = np.clip(bbox[1], 0, h_img)
                    bbox[2] = np.clip(bbox[2], 0, w_img)
                    bbox[3] = np.clip(bbox[3], 0, h_img)
                    if score > cfg.detect_thres:
                        text = names[lab]  # + ' %.2f' % score
                        # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5)
                        polygon = contour.reshape((-1, 2))
                        # print('Shape: Poly -- ', polygon.shape)
                        # print(polygon)
                        polygon[:, 0] = np.clip(polygon[:, 0], 0, w_img - 1)
                        polygon[:, 1] = np.clip(polygon[:, 1], 0, h_img - 1)

                        # use bb tools to draw predictions
                        color = random.choice(COLOR_WORLD)
                        bb.add(output_image, bbox[0], bbox[1], bbox[2],
                               bbox[3], text, color)
                        cv2.polylines(output_image, [polygon.astype(np.int32)],
                                      True,
                                      RGB_DICT[color],
                                      thickness=1)
                        cv2.drawContours(blend_mask,
                                         [polygon.astype(np.int32)],
                                         contourIdx=-1,
                                         color=RGB_DICT[color],
                                         thickness=-1)

                        # color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                        # contour_mean = np.mean(polygon, axis=0)
                        # contour_std = np.std(polygon, axis=0)
                        # center_x, center_y = np.mean(polygon, axis=0).astype(np.int32)
                        # text_location = [bbox[0] + 1, bbox[1] + 1,
                        #                  bbox[1] + label_size[0][0] + 1,
                        #                  bbox[0] + label_size[0][1] + 1]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=color, thickness=1)
                        # cv2.rectangle(output_image, pt1=(int(np.min(polygon[:, 0])), int(np.min(polygon[:, 1]))),
                        #               pt2=(int(np.max(polygon[:, 0])), int(np.max(polygon[:, 1]))),
                        #               color=(0, 255, 0), thickness=1)
                        # cv2.polylines(output_image, [polygon.astype(np.int32)], True, color, thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5,
                        #             color=(255, 0, 0))
                        # cv2.putText(output_image, text, org=(int(bbox[0]), int(bbox[1])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.5,
                        #             color=color)

                        # show the histgram for predicted codes
                        # fig = plt.figure()
                        # plt.plot(np.arange(cfg.n_codes), c_.reshape((-1,)), color='green',
                        #          marker='o', linestyle='dashed', linewidth=2, markersize=6)
                        # plt.ylabel('Value of each coefficient')
                        # plt.xlabel('All predicted {} coefficients'.format(cfg.n_codes))
                        # plt.title('Distribution of the predicted coefficients for {}'.format(text))
                        # plt.show()

            value = [255, 255, 255]
            dst_img = cv2.addWeighted(output_image, 0.5, blend_mask, 0.5, 0)
            dst_img[blend_mask == 0] = output_image[blend_mask == 0]
            img_original = cv2.copyMakeBorder(img_original, 0, 0, 0, 10,
                                              cv2.BORDER_CONSTANT, None, value)
            img_connect = cv2.copyMakeBorder(img_connect, 0, 0, 10, 10,
                                             cv2.BORDER_CONSTANT, None, value)
            img_recon = cv2.copyMakeBorder(img_recon, 0, 0, 10, 10,
                                           cv2.BORDER_CONSTANT, None, value)
            dst_img = cv2.copyMakeBorder(dst_img, 0, 0, 10, 0,
                                         cv2.BORDER_CONSTANT, None, value)
            im_cat = np.concatenate(
                (img_original, img_connect, img_recon, dst_img), axis=1)
            # im_cat = np.concatenate((img_original, img_connect, img_recon), axis=1)
            cv2.imshow('GT:Resample:Recons:Predict', im_cat)
            if cv2.waitKey() & 0xFF == ord('q'):
                break
Esempio n. 29
0
def add_bounding_box(image, bb_info):
    xmin, ymin, xmax, ymax = bb_info["coordinates"]
    label = bb_info["label"] if "label" in bb_info else ""
    color = bb_info["color"] if "color" in bb_info else "green"

    bb.add(image, xmin, ymin, xmax, ymax, label, color)