Esempio n. 1
def visualize_bbox(img_path,
    Required library:
    import cv2
    from bounding_box import bounding_box as bb

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)

    for obj in target['object']:
        if plot_objects:
            xmin = obj['bndbox']['xmin']
            ymin = obj['bndbox']['ymin']
            xmax = obj['bndbox']['xmax']
            ymax = obj['bndbox']['ymax']
            bb.add(img, xmin, ymin, xmax, ymax, obj['name'])
        if plot_parts:
            for part in obj['parts']:
                xmin = part['bndbox']['xmin']
                ymin = part['bndbox']['ymin']
                xmax = part['bndbox']['xmax']
                ymax = part['bndbox']['ymax']
                bb.add(img, xmin, ymin, xmax, ymax, part['name'])

    cv2.imwrite(out_img_path, img)
    cv2.imshow(target['filename'], img)
Esempio n. 2
def predict(frame, faceNet, model, labels):
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (104.0, 177.0, 123.0))
    detections = faceNet.forward()

    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]

        if confidence > args["conf"]:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            face = frame[startY:endY, startX:endX]
            age = get_age_from_model(face, model, labels)

            # display the predicted age to our terminal
            text = "{:s}".format(age)

            # draw the bounding box of the face along with the associated
            # predicted age
            #y = startY - 10 if startY - 10 > 10 else startY + 10
            bb.add(frame, startX, startY, endX, endY, text)
            #cv2.rectangle(frame, (startX, startY), (endX, endY),(0, 0, 255), 2)

#            cv2.puttext(frame, text, (startX, y),cv2.font_hershey_simplex,
#                        0.45, (0, 0, 255), 2)

    return frame
Esempio n. 3
def main():
    cont = 0
    while (True):
        cont += 1
        if not cont % 4 == 0:
        ret, frame =
        rcocho = model1.result(frame)
        rbeberoudo = model2.result(frame)
        #    rpatio = model3.result(frame)
        image = cv2.resize(frame, (int(720), int(480)))
        bb.add(image, cocho[0], cocho[1], cocho[0] + cocho[2],
               cocho[1] + cocho[3], "COCHO STATUS: " + rcocho[0], "orange")
        bb.add(image, beberoudo[0], beberoudo[1], beberoudo[0] + beberoudo[2],
               beberoudo[1] + beberoudo[3],
               "BEBEROUDO STATUS: " + rbeberoudo[0], "aqua")
        #    bb.add(image, patio[0], patio[1], patio[0]+ patio[2], patio[1]+patio[3], "PATIO STATUS: " + rpatio[0], "green")
        img = image
        #roi_cocho = result[1]
        #cv2.imshow("roi sensor",roi_cocho)
        cv2.imshow('frame', img)
        if cv2.waitKey(1) & 0xFF == ord('q'):

Esempio n. 4
    def show(self, show, ia=None):
        """Draws inferenced objects into the image, if show == True, also displays it"""
        for d in self.detections:
            color = (34, 139, 34)
            start_x, start_y, w, h = d.bbox.unwrap()

            bb.add(self.img, start_x, start_y, start_x + w, start_y + h,
            #cv2.rectangle(self.img, (start_x, start_y), (start_x+w,start_y+h), color, 2)
            #text = "{}".format(d.label)
            #cv2.putText(self.img, text, (start_x, start_y -5),
            #cv2.FONT_HERSHEY_SIMPLEX,0.5, color, 1)

        #showing also grounding truths for testing purposes
        if ia != None:
            for gt in ia.gts:
                color = (128, 0, 0)

                x = int(float(gt.bbox.x) * IMG_W)
                y = int(float(gt.bbox.y) * IMG_H)

                x2 = x + int(float(gt.bbox.w) * IMG_W)
                y2 = y + int(float(gt.bbox.h) * IMG_H)

                cv2.rectangle(self.img, (x, y), (x2, y2), color, 2)

        if show == True:
            cv2.imshow('image', self.img)
    def get_frame(self):
        success, frame =
        cv2.imwrite("frame.jpg", frame)
        image = cv2.imread("frame.jpg")
        img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        faces = detector.detect_faces(img)
        for face in faces:
            x, y, width, heigh = face['box']
            bb.add(image, x, y, x + width, y + heigh, "Face", "fuchsia")

        ret, jpeg = cv2.imencode('.jpg', image)
        return jpeg.tobytes()
Esempio n. 6
def make_box():

    with open(EMOTION_TXT_FILE, 'r') as eFile:
        lines = eFile.readlines()
        for i, row in enumerate(lines):
            frame = json.loads(row)

            path = RAW_FRAMES_OUTPUT_DIR + '/img' + str(i) + '.jpg'
            image = cv2.imread(path, cv2.IMREAD_COLOR)

            for face in frame:

                box = face["faceRectangle"]

                # Remove the bottom box
                if box["top"] > BOTTOM_BOXES:

                emotion = calc_max_emotion(face["faceAttributes"]["emotion"])

                # Make Box
                bb.add(image, box['left'], box['top'],
                       box['left'] + box['width'], box['top'] + box['height'],
                       emotion, EMOTION_COLOR_DICT[emotion])

                # Add emoji
                overlay = cv2.imread(EMOJI_IMAGES_PATH + '\\' + emotion +
                rows, cols, channels = overlay.shape

                # Finding emoji loc

                if box['left'] < LEFT_BOX:
                    emoji_coods = LEFT_EMOJI

                elif box['top'] < TOP_RIGHT_BOX:
                    emoji_coods = TOP_RIGHT_EMOJI

                    emoji_coods = BOTTOM_RIGHT_EMOJI

                overlay = cv2.addWeighted(
                    image[emoji_coods[1]:emoji_coods[1] + rows,
                          emoji_coods[0]:emoji_coods[0] + cols], 0.2, overlay,
                    0.8, 0)

                image[emoji_coods[1]:emoji_coods[1] + rows,
                      emoji_coods[0]:emoji_coods[0] + cols] = overlay

            cv2.imwrite(RESULT_FRAMES_PATH + '/img' + str(i) + '.png', image)
def predict(image, net, layer, label, default_colors):
    (imageHeight, imageWidth) = image.shape[:2]
    #Detect object
    blob = cv2.dnn.blobFromImage(image,
                                 0.00392, (416, 416), (0, 0, 0),
    layerOutputs = net.forward(layer)
    # Box dimensions
    boxes = []
    confidences = []
    classIDs = []
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > CONFIDENCE:
                box = detection[0:4] * np.array(
                    [imageWidth, imageHeight, imageWidth, imageHeight])
                (centerX, centerY, width, height) = box.astype("int")
                #possible make value < 0
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                print(x, y, width, height)
                if (y < 0):
                    y = 0
                if (x < 0):
                    x = 0
                boxes.append([x, y, int(width), int(height)])

    # Draw labels
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE, NMS_THRES)

    if len(idxs) > 0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            text = "{}: {:.4f}".format(label[classIDs[i]], confidences[i])
            bb.add(image, x, y, x + w, y + h, text,

    listClassne = ','.join([str(n) for n in classIDs])
    return image, listClassne
Esempio n. 8
def main():
        ret, frame =
        result = model.result(frame)
        data_value = result[0]
        image = cv2.resize(frame,(int(720),int(480)))
        bb.add(image, beberoudo[0], beberoudo[1], beberoudo[0]+beberoudo[2], beberoudo[1]+beberoudo[3], "bebedouro", "aqua")
        bb.add(image, cocho[0], cocho[1], cocho[0]+ cocho[2], cocho[1]+cocho[3], "COCHO STATUS: " + data_value, "orange")
        img = image
        #roi_cocho = result[1]
        #cv2.imshow("roi sensor",roi_cocho)
        if cv2.waitKey(1) & 0xFF == ord('q'):

 def draw_bbox(pnid_img):
     img_arr = np.array(pnid_img)
     height, width = img_arr.shape[:-1]
     img_arr_copy = img_arr[:, :, ::-1].copy()
     for detected_item in
         bbox = detected_item.bounding_box
         label = detected_item.text
             int(bbox["xMin"] * width),
             int(bbox["yMin"] * height),
             int(bbox["xMax"] * width),
             int(bbox["yMax"] * height),
     return Image.fromarray(img_arr_copy[:, :, ::-1])
Esempio n. 10
def process_image(image_data):
    image = cv2.imread(image_data.image_path)

    image = cv2.putText(image, image_data.image_name, (5, 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    color_list = [
        "maroon", "green", "yellow", "purple", "fuchsia", "lime", "red",
    for ann in image_data.annotations:
        id_color = random.randint(0, 7)
        box_color = color_list[id_color]

        bb.add(image, ann.xmin, ann.ymin, ann.xmax, ann.ymax,,
        #image = cv2.rectangle(image, (ann.xmin, ann.ymin), (ann.xmax, ann.ymax), box_color, args.line_thickness)
        #image = cv2.putText(image,, (ann.xmin, ann.ymin), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
    return image
Esempio n. 11
    def predict(self, raw_image):
        x, img = data.transforms.presets.ssd.transform_test(
            nd.array(raw_image), short=INPUT_H)
        print('Shape of pre-processed image:', x.shape)

        class_IDs, scores, bounding_boxes =
        for box in bounding_boxes:
            #    print(box)
            #    print(box)
            bb.add(img, box[0][0], box[0][3], box[0][1], box[0][2], "a",
                   (255, 255, 255))
        # print(bounding_boxes.shape)

        # ax = utils.viz.plot_bbox(x, bounding_boxes[0], scores[0],
        #                         class_IDs[0],

        return [class_IDs, scores, bounding_boxes]
def make_box():

    with open(EMOTION_CSV_FILE, 'r') as csvFile:
        reader = csv.reader(csvFile)
        for i, row in enumerate(reader):
            if i == 0:
            path = RAW_FRAMES_OUTPUT_DIR + '/img' + str(row[0]) + '.jpg'
            image = cv2.imread(path, cv2.IMREAD_COLOR)
            row[1] = row[1].replace("\'", "\"")
            box = json.loads(row[1])

            if not box:
                direction = WRONG

                direction = CORRECT

                if box['top'] + box['height'] > BOX['bottom']:
                    direction = DOWN
                elif box['top'] < BOX['top']:
                    direction = UP
                elif box['left'] + box['width'] > BOX['right']:
                    direction = RIGHT
                elif box['left'] < BOX['left']:
                    direction = LEFT

                bb.add(image, BOX['left'], BOX['top'], BOX['right'],
                       BOX['bottom'], direction['label'], direction['color'])

            # Add sign
            overlay = cv2.imread(EMOJI_IMAGES_PATH + '\\' + direction['image'])
            rows, cols, channels = overlay.shape

            overlay = cv2.addWeighted(
                image[SIGN_ROW:SIGN_ROW + rows, SIGN_COL:SIGN_COL + cols], 0,
                overlay, 0.8, 0.2)
            image[SIGN_ROW:SIGN_ROW + rows, SIGN_COL:SIGN_COL + cols] = overlay

            cv2.imwrite(RESULT_FRAMES_PATH + '/img' + row[0] + '.png', image)
Esempio n. 13
def _plot_boxes(img: Image,
                bboxes: np.ndarray,
                scores: Optional[List] = None,
                class_map: Optional[Dict] = dict(),
                class_color_map: Optional[Dict] = dict(),
    draw_img = np.array(img)
    for i, box in enumerate(bboxes):
        bbox = list(map(lambda x: max(0, int(x)), box[:-1]))
        if not isinstance(box[-1], str):
            category = class_map.get(int(box[-1]), str(int(box[-1])))
            category = box[-1]
        if kwargs.get("truncate_label", None) is not None:
            category = "".join([
                for l in category.split(kwargs.get("truncate_label"))
        if scores is not None:
            category = category + ":" + str(round(scores[i], 2))
        color = class_color_map.get(int(box[-1]), "green")
        bb.add(draw_img, *bbox, category, color=color)
    return Image.fromarray(draw_img)
Esempio n. 14
def vis_detection(im_orig, detections, class_names, file='', thresh=0.7):
    """visualize [cls, conf, x1, y1, x2, y2]"""
    cmap = [
        'black', 'navy', 'blue', 'silver', 'aqua', 'teal', 'olive', 'purple',
        'green', 'fuchsia', 'lime', 'red', 'yellow', 'orange', 'red', 'maroon',
        'fuchsia', 'purple', 'black', 'gray', 'silver'

    im_orig = cv2.cvtColor(im_orig, cv2.COLOR_BGR2RGB)

    for [cls, conf, x1, y1, x2, y2] in detections:
        cls = int(cls)
        if cls > 0 and conf > thresh:

            bb.add(im_orig, int(x1), int(y1), int(x2), int(y2),
                   '{:s} {:.3f}'.format(class_names[cls], conf), cmap[cls])

    # plt.axis('off')
    # #
    # plt.savefig('test.png')
    # cv2.imshow("d", im_orig)
                     file.strip() + '222.jpg'), im_orig)
Esempio n. 15
def img_detect(params):
    arguments = {
        "image": "file_upload.jpg",
        "label": "yolo-coco/{}".format(params["names"]),
        "weight": "yolo-coco/{}".format(params["weight"]),
        "config": "yolo-coco/{}".format(params["config"]),
        "threshold": params["threshold"],
        "confidence": params["confidence"]

    LABELS = open(arguments["label"]).read().strip().split("\n")
    COLORS = helper.get_rand_colors(len(LABELS))

    weightsPath = arguments["weight"]
    configPath = arguments["config"]

    ########################### reference ###########################
    print("[INFO] loading YOLO from disk...")
    net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
    image = cv2.imread(arguments["image"])

    (H, W) = image.shape[:2]
    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    blob = cv2.dnn.blobFromImage(image,
                                 1 / 255.0, (416, 416),


    layerOutputs = net.forward(ln)

    boxes = []
    confidences = []
    classIDs = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > arguments["confidence"]:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                boxes.append([x, y, int(width), int(height)])

    idxs = cv2.dnn.NMSBoxes(boxes, confidences, arguments["confidence"],

    ########################### reference ###########################
    obj = []
    if len(idxs) > 0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            text = "{}".format(LABELS[classIDs[i]])
            color = COLORS[classIDs[i]]
            bb.add(image, x, y, x + w, y + h, text, color)
            bb.add(image, x, y, x + w, y + h, text)

            if classIDs[i] not in obj:

# show the output image
    retval, buffer = cv2.imencode('.png', image)
    jpg_as_text = base64.b64encode(buffer)
    restext = jpg_as_text.decode(encoding='UTF-8')

    response = {"res": restext, "obj": obj}

    return response
Esempio n. 16
def main():
    in_path = os.path.join("docs", "images", "winton.jpg")
    out_path = os.path.join("docs", "images", "winton_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 281, 12, 744, 431, "Winton", "maroon")
    bb.add(image, 166, 149, 500, 297, "Trumpet", "yellow")
    show_and_save("Winton MARSALIS", image, out_path)

    in_path = os.path.join("docs", "images", "khatia.jpg")
    out_path = os.path.join("docs", "images", "khatia_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 280, 24, 802, 593, "Khatia", "maroon")
    bb.add(image, 687, 1, 1448, 648, "Piano", "gray")
    bb.add(image, 888, 492, 1190, 536, "Text")
    show_and_save("Khatia BUNIATISHVILI", image, out_path)

    in_path = os.path.join("docs", "images", "clarifloue.jpg")
    out_path = os.path.join("docs", "images", "clarifloue_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 69, 86, 470, 136, label="Headache designer")
    bb.add(image, 136, 196, 406, 234, "Text")
    bb.add(image, 67, 351, 471, 400, "Headache designer")
    bb.add(image, 130, 456, 390, 494, "Text")
    show_and_save("Clarinet", image, out_path)

    in_path = os.path.join("docs", "images", "nao-romeo-pepper.jpg")
    out_path = os.path.join("docs", "images", "nao-romeo-pepper_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 155, 152, 244, 297, "Nao")
    bb.add(image, 260, 6, 423, 416, "Romeo")
    bb.add(image, 421, 76, 547, 402, "Pepper")
    show_and_save("Robots", image, out_path)

    in_path = os.path.join("docs", "images", "ski-paraglider.jpg")
    out_path = os.path.join("docs", "images", "ski-paraglider_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 0, 128, 645, 589, "Paraglider", "orange")
    bb.add(image, 689, 442, 818, 566, "Skier", "gray")
    show_and_save("Ski and paraglider", image, out_path)

    in_path = os.path.join("docs", "images", "paragliders.jpg")
    out_path = os.path.join("docs", "images", "paragliders_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 90, 228, 318, 428, "Paraglider")
    bb.add(image, 521, 110, 656, 415, "Paraglider")
    show_and_save("Pretty Bounding Box", image, out_path)

    in_path = os.path.join("docs", "images", "selfie.jpg")
    out_path = os.path.join("docs", "images", "selfie_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 5, 7, 150, 169, "Female", "fuchsia")
    bb.add(image, 116, 7, 193, 113, "Male", "blue")
    bb.add(image, 189, 7, 291, 124, "Female", "fuchsia")
    bb.add(image, 288, 25, 355, 114, "Male", "blue")
    bb.add(image, 367, 0, 448, 92, "Male", "blue")
    bb.add(image, 435, 29, 506, 104, "Female", "fuchsia")
    bb.add(image, 497, 3, 597, 111, "Female", "fuchsia")
    bb.add(image, 110, 133, 213, 245, "Female", "fuchsia")
    bb.add(image, 176, 120, 293, 289, "Female", "fuchsia")
    bb.add(image, 314, 115, 470, 357, "Male", "blue")
    bb.add(image, 468, 72, 577, 226, "Male", "blue")
    show_and_save("The Selfie", image, out_path)

    in_path = os.path.join("docs", "images", "pobb.jpg")
    out_path = os.path.join("docs", "images", "pobb_bb.png")
    image = cv2.imread(in_path, cv2.IMREAD_COLOR)
    bb.add(image, 76, 62, 155, 271, "Female", "fuchsia")
    bb.add(image, 157, 44, 288, 274, "Male", "blue")
    bb.add(image, 224, 64, 317, 274, "Male", "blue")
    bb.add(image, 290, 48, 383, 277, "Male", "blue")
    bb.add(image, 350, 42, 458, 276, "Female", "fuchsia")
    bb.add(image, 416, 17, 510, 279, "Male", "blue")
    bb.add(image, 482, 55, 573, 278, "Female", "fuchsia")
    bb.add(image, 547, 63, 615, 277, "Female", "fuchsia")
    bb.add(image, 608, 49, 704, 275, "Female", "fuchsia")
    bb.add(image, 672, 34, 767, 274, "Male", "blue")
    bb.add(image, 725, 62, 813, 273, "Female", "fuchsia")
    bb.add(image, 786, 38, 887, 267, "Male", "blue")
    bb.add(image, 864, 51, 959, 266, "Male", "blue")
    show_and_save("POBB", image, out_path)
def predict(image):
    # initialize a list of colors to represent each possible class label
    COLORS = ["blue", "yellow", "red", "green"]
    (H, W) = image.shape[:2]
    # determine only the "ouput" layers name which we need from YOLO
    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    # construct a blob from the input image and then perform a forward pass of the YOLO object detector, 
    # giving us our bounding boxes and associated probabilities
    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=False, crop=False)
    layerOutputs = net.forward(ln)
    boxes = []
    confidences = []
    classIDs = []
    threshold = 0.3
    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # extract the class ID and confidence (i.e., probability) of
            # the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # filter out weak predictions by ensuring the detected
            # probability is greater than the minimum probability
            # confidence type=float, default=0.5
            if confidence > threshold:
                # scale the bounding box coordinates back relative to the
                # size of the image, keeping in mind that YOLO actually
                # returns the center (x, y)-coordinates of the bounding
                # box followed by the boxes' width and height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # use the center (x, y)-coordinates to derive the top and
                # and left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # update our list of bounding box coordinates, confidences,
                # and class IDs
                boxes.append([x, y, int(width), int(height)])

    # apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, threshold, 0.3)

    print (idxs)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # draw a bounding box rectangle and label on the image
            color = str(np.random.choice(COLORS, 1)[0])
            text = "{}".format(LABELS[classIDs[i]], confidences[i])
            #cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
            #cv2.putText(image, text, (x +15, y - 10), cv2.FONT_HERSHEY_SIMPLEX,1, color, 2)

        return image, LABELS[classIDs[i]], confidences[i]

    return image, None, None
Esempio n. 18
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    img_ids = None
    if not img_ids:
        img_ids = sample(list(json.load(open(gqa_val_sgs)).keys()), NUM_IMGS_TO_TEST)
    ref_objects_dict = {value[0]: key for key, value in json.load(open(ref_objs_dict, 'r')).items()}
    data_dict = json.load(open(labels_path), object_pairs_hook=OrderedDict)
    obj_labels = data_dict['relevant_objs']
    if not 'BACKGROUND' in obj_labels:
    obj_labels_dict = {i: obj_labels[i] for i in range(len(obj_labels))}
    att_labels = data_dict['relevant_atts']
    att_labels_dict = {i: att_labels[i] for i in range(len(att_labels))}

    att_categories = None
    if categorize_atts:
        att_categories = json.load(open(ATT_CATEGORIES_FILE, 'r'))
        att_categories = \
            {key: list(value.keys()) for key, value in att_categories.items() if key not in CATEGORIES_TO_DROP}

    model = InferenceMLPModel(mlp_params['hidden_dim'], mlp_params['input_dim'], len(obj_labels_dict),
                              len(att_labels_dict), att_categories).to(device)
    ref_model = MLPModel(128, 2048, len(ref_objects_dict)).to(device)

    with h5py.File(gqa_data_file, 'r') as data_f, torch.set_grad_enabled(False):
        for img_id in tqdm(img_ids, desc='Evaluating images...'):
            features = data_f['features_' + img_id][()]
            tensor_features = torch.Tensor(features).unsqueeze(0).to(device)
            bboxes = data_f['bboxes_' + img_id][()]
            img = cv2.imread(imgs_path.format(img_id))
            img_copy = img.copy()
            img_resized = cv2.resize(img.copy(), (0, 0), fx=2, fy=2)
            pred_obj_labels, pred_obj_probs, pred_att_labels, pred_att_probs = \
                model(tensor_features, np.array([features.shape[0]]))
            pred_obj_labels = pred_obj_labels[0]
            pred_obj_probs = pred_obj_probs[0]
            if WITH_ATTS and not categorize_atts:
                pred_att_labels = pred_att_labels[0]
                pred_att_probs = pred_att_probs[0]

            relevant_bboxes_data = []
            for i in range(len(pred_obj_labels)):
                cur_label = []
                if obj_labels_dict[pred_obj_labels[i]] == 'BACKGROUND':
                if pred_obj_probs[i] > OBJ_CONF_THRESH:
                    if WITH_ATTS:
                        if categorize_atts:
                            for att_category in CATEGORIES_TO_SHOW:
                                if pred_att_probs[att_category][i] > ATT_CONF_THRESH:
                                    cur_category_label = pred_att_labels[att_category][i]
                                    cur_label.insert(0, att_categories[att_category][cur_category_label])
                        elif pred_att_probs[i] > ATT_CONF_THRESH:
                            cur_label.insert(0, pred_att_labels[i])

                    relevant_bboxes_data.append((i, ' '.join(cur_label)))

            for box_data in relevant_bboxes_data:
                box_id, box_label = box_data
                cur_bbox = bboxes[box_id, :]
                bb.add(img, cur_bbox[0], cur_bbox[1], cur_bbox[2], cur_bbox[3], box_label)

            # visualise reference model
            ref_preds = ref_model(tensor_features.squeeze(0)).cpu().detach().numpy()
            ref_pred_labels = np.argmax(ref_preds, axis=1)
            ref_pred_probs = np.max(ref_preds, axis=1)
            relevant_bboxes_data_ref = [(i, ref_pred_labels[i]) for i in range(len(ref_pred_probs))
                                        if ref_pred_probs[i] > REF_CONF_THRESH]
            for box_data in relevant_bboxes_data_ref:
                box_id, box_label = box_data
                cur_bbox = bboxes[box_id, :]
                bb.add(img_copy, cur_bbox[0], cur_bbox[1], cur_bbox[2], cur_bbox[3], ref_objects_dict[box_label])
            imgs_concat = np.concatenate((img_resized, np.concatenate((img, img_copy), axis=1)), axis=0)
            cv2.imwrite(os.path.join(output_path, img_id + '.jpg'), imgs_concat)
Esempio n. 19
 # loop over the results
 for (i, (prob, bbox, centroid)) in enumerate(results):
     (startX, startY, endX, endY) = bbox
     (cX, cY) = centroid
     color = "green"
     # if the index pair exists within the violation set, then
     # update the color
     if i in violate:
         color = "red"
     # draw bounding box around the person and his/her centorid
     bb.add(frame,startX,startY-5,endX,endY-5,color=color), (cX, cY), 3, colors[color], -1)
 # display the toal number of social distance violation
 text = "Social Distancing Violation: {:d}".format(len(violate))
 cv2.putText(frame, text, (10, 25),
             cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 2)
 frame_show = cv2.resize(frame, (1280,768))
 cv2.imshow("Social distancing", frame_show)
 key = cv2.waitKey(1) & 0xFF
 if key == 27:
Esempio n. 20
'''Test img '''

test_image = face_recognition.load_image_file(img_dir)

# Find all the faces in the test image using the default HOG-based model
face_locations = face_recognition.face_locations(test_image)
no = len(face_locations)
print("Number of faces detected: ", no)

# Predict all the faces in the test image using the trained classifier
for i in range(no):
    test_image_enc = face_recognition.face_encodings(test_image)[i]
    name = model.predict([test_image_enc])

x_min = face_locations[0][2]
y_min = face_locations[0][0]
x_max = face_locations[0][1]
y_max = face_locations[0][3]

color_list = [
    "maroon", "green", "yellow", "purple", "fuchsia", "lime", "red", "silver"
id_color = random.randint(0, 7)
box_color = color_list[id_color]
bb.add(test_image, x_min, y_min, x_max, y_max, str(name[0]), box_color)

cv2.imshow('demo', test_image)
Esempio n. 21
def addBox(row, image, color, labelName='label', labelAug=''):
    bb.add(image, row.bbl,, row.bbr, row.bbb,
           row[labelName] + labelAug, color)
Esempio n. 22
            # getting contours
            contours, img = cv2.findContours(binary, cv2.RETR_TREE,

        cv2.line(frame, (415, LINE_POSITION_1), (715, LINE_POSITION_1),
                 (0, 255, 0), 3)
        cv2.line(frame, (235, LINE_POSITION_2), (878, LINE_POSITION_2),
                 (0, 0, 255), 3)

        for c in contours:
            x, y, w, h = cv2.boundingRect(c)
            x_c, y_c = center(cv2.boundingRect(c))  # getting center

            # contour validation
            if (y_c > LINE_POSITION_1) and (y_c < LINE_POSITION_2) and (
                    x_c > 235) and (y_c < 878):
                contour_validation = (w >= MIN_WIDTH) and (
                    h >= MIN_HEIGTH) and (w <= MAX_WIDTH) and (h <= MAX_HEIGHT)
                if contour_validation:
                    bb.add(frame, x, y, x + w, y + h, 'CAR', 'yellow')

        cv2.imshow("Result", frame)

        if cv2.waitKey(1) == 27:

Esempio n. 23
# print(inference_result.examples[0].lrtb[0])

# for root, dirs, files in os.walk(INPUT_IMG_DIR):
# print(files)

assert (len(inference_result.examples[0].image_id) == len(

for i in range(len(inference_result.examples[0].image_id)):
    image_id = inference_result.examples[0].image_id[i]
    bounding_box_lrtb = inference_result.examples[0].bounding_box_lrtb[i]

    input_image_path = INPUT_IMG_DIR + "/" + str(image_id).zfill(12) + ".jpg"
    output_image_path = OUTPUT_IMG_DIR + "/" + str(i) + ".jpg"

    # assert(bounding_box_lrtb[0] < image)
    image = cv2.imread(input_image_path, cv2.IMREAD_COLOR)

    print("bounding_box: ", bounding_box_lrtb)
    print("image: ", image.shape)
    assert(bounding_box_lrtb[0] <= image.shape[1])
    assert(bounding_box_lrtb[1] <= image.shape[1])
    assert(bounding_box_lrtb[2] <= image.shape[0])
    assert(bounding_box_lrtb[3] <= image.shape[0])

    bounding_box.add(image, bounding_box_lrtb[0], bounding_box_lrtb[2],
                     bounding_box_lrtb[1], bounding_box_lrtb[3])
    cv2.imwrite(output_image_path, image)
Esempio n. 24
def main():
    # initialize a list storing counted objectID
    counted_objectID = []
    # intialize frame dimensions
    H,W = None, None
     initialize our centroid tracker, then initialize a lost to store
     each of our dlib correlation trackers, followed by a dictionary to 
     map each unique object ID to a TrackableOject 
    ct = CentroidTracker(maxDisappeared=conf["max_disappear"],
    trackers = []
    trackableObjects = {}
    # keep the count of total number of frames
    totalFrame = 0
    net = load_model()
    cap = cv2.VideoCapture(args["video"])
    # initilize the coordiate of counting line
    _, frame =
    count_line = [(0,frame.shape[0]-conf["line_coordinate"]),
    # initialize car counting variable
    car_count = 0
    if args["save"]:
        video_size = (frame.shape[1]+250,frame.shape[0])
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        writer = cv2.VideoWriter("processed_video.avi",fourcc,24,video_size)

    while True:
        ret, frame =
        # coordinate of countign line
        # brekout the loop if no frame is captured
        if frame is None:
        # save origin frame for later displaying
        origin_frame = frame.copy()
        # crop frame before countign line
        frame = frame[:frame.shape[0]-conf["line_coordinate"]-25,:,:]
        # convert to RGB colour space
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # if the frame dimensions are empty, set them
        if W is None or H is None:
            (H,W) = frame.shape[:2]
        initialize our list of bounding box rectangles returned by 
        either (1) our object detector or (2) the correlation trackers
        rects = []
        check to see if we should run a more computationally expensive
        object detection method to add our tracker
        if totalFrame % conf["track_object"] == 0:
            # initialize our new set of object trackers
            trackers = []
            convert the frame to a blob and pass the blob 
            through the netwrok and obtain the detections
            #blob = cv2.dnn.blobFromImage(frame, size=(300,300),ddepth=cv2.CV_8U)
            blob = cv2.dnn.blobFromImage(frame,ddepth=cv2.CV_8U)
            #net.setInput(blob, scalefactor=1.0/127.5, mean=[127.5,127.5,127.5])
            net.setInput(blob, scalefactor=1.0/255, mean=[255,255,255])
            detections = net.forward()

            # loop over the detections
            for i in np.arange(0, detections.shape[2]):
                extract the confidence (i.e., probability)
                associate with the predicton
                confidence = detections[0,0,i,2]
                filter out weak detections by 
                setting a threshold confidence
                if confidence > conf["confidence"]:
                    extract the index of the class label
                    from detection list
                    idx = int(detections[0,0,i,1])
                    # if the class label is not a car, skip it
                    if CLASSES[idx] != "car":
                    compute the (x,y)-coordinates of the 
                    bounding box for the object
                    box = detections[0,0,i,3:7] * np.array([W,H,W,H])
                    (startX, startY, endX, endY) = box.astype("int")
                    construct a dlib rectangle object from the bounding
                    box coordinates and then start the dlib correlation tracker
                    tracker = dlib.correlation_tracker()
                    rect = dlib.rectangle(startX,startY, endX, endY)
                    add the tracker to our list of trackers
                    so we can utilize it during skip frames
        otherwise, we should utilize our object "trackes" rather than 
        object "detectors" to obtain a higher frame preprocessing
            # loop over the tracker
            for tracker in trackers:
                pos = tracker.get_position()
                # unpack the position project
                post_list = [pos.left(),,pos.right(), pos.bottom()]
                [startX, startY, endX, endY] = list(map(int,post_list))         
                # add the bounding box coordinate to the rectangle list
        use the centroid tracker to associate the (1) old object
        centroids with (2) the newly computed object centroids
        objects = ct.update(rects)
        # loop over the tracked objects
        for (objectID, centroid),rect in zip(objects.copy().items(),rects):
            # if objectID is already counted then skip it
            if objectID in counted_objectID:
                objects = ct.update(rects)
                if centroid of the car cross count line 
                then increment car_count
                if (centroid[1] + 60 > count_line[0][1]): 
                    objects = ct.update(rects)
            check to see if a trackable object exists
            for the current object ID
            to = trackableObjects.get(objectID, None)
            # if there is no exisiting trackable object, create one
            if to is None:
                to = TrackableObject(objectID, centroid)
            # store the trackable object in our dictinonary
            trackableObjects[objectID] = to
            draw both the ID of the object and the centroid 
            of the object on the output frame
            text = "ID {}".format(objectID)
            cv2.putText(origin_frame, text, (centroid[0] - 10, centroid[1] - 10), 
                  cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
  , (centroid[0], centroid[1]), 4,
                       (0, 255, 0), -1)
        # create a blank space next to frame to display No. cars
        blank_region = np.ones((origin_frame.shape[0],250,3), np.uint8)*255
        cv2.putText(blank_region, "No. car(s):", (40,origin_frame.shape[0]//2-120),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
        # stack the frame with blank space
        cv2.putText(blank_region, str(car_count), (40,origin_frame.shape[0]//2),
                    cv2.FONT_HERSHEY_SIMPLEX, 4, (0, 0, 0), 3)
        stack_image = np.concatenate((origin_frame,blank_region),axis=1)
        cv2.imshow("Final result", stack_image)
        # save processed videos
        if args["save"]:
        # press ESC to terminate the system
        key = cv2.waitKey(1)  & 0xff
        if key == 27:
        # increment the total number of frame processed so far 
        totalFrame += 1
    if args["save"]:
Esempio n. 25
def draw_bbox(img, bbox, conf=None, color='red'):
  boxtext = str(conf) if conf is not None else 'face'
  bb.add(img, *bbox, boxtext, color)
  return img
Esempio n. 26
 def visualize(self, image, pred):
     img = Model.img2arr(image)
     rois, class_ids = EfficientDetModel.parse(pred)
     for i, roi in enumerate(rois):
         bb.add(img, roi[0],roi[1],roi[2],roi[3], str(class_ids[i]))
    all_frames = load_all_seq_frames(frames_folder)

    # Printing sequence info

    # Make directory to save annotated images
    if not os.path.exists("results/" + sequence):
        os.makedirs(os.path.dirname("results/" + sequence), exist_ok=True)

    # Write bounding boxes in each frame and save each frame
    num_frames = len(all_frames)
    for i in range(num_frames):
        frame = all_frames[i][0].copy()
        for line in gt:
            if line[0] == i + 1:
                bb.add(frame, line[2], line[3], line[2] + line[4],
                       line[3] + line[5], line[1], 'green')
        cv2.imwrite("results/" + sequence + "{}.jpg".format(i), frame)
    #    0       1      2           3         4            5          6     7    8    9
    # <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z>
vc = cv2.VideoCapture(path)
W = vc.get(cv2.CAP_PROP_FRAME_WIDTH)
size = (int(W), int(H))

out = cv2.VideoWriter('{}.mp4'.format(video_sequence_name), cv2.VideoWriter_fourcc(*'DIVX'), 20, size)

while True:
    frame =
    frame = frame[1]
    if frame is None:
Esempio n. 28
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
        # raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model =

    # Loading COCO validation images
    annotation_file = '{}/annotations/instances_{}.json'.format(
        cfg.data_dir, cfg.data_type)
    coco = COCO(annotation_file)

    # Load all annotations
    cats = coco.loadCats(coco.getCatIds())
    nms = [cat['name'] for cat in cats]
    catIds = coco.getCatIds(catNms=nms)
    # imgIds = coco.getImgIds(catIds=catIds)
    imgIds = coco.getImgIds()
    # annIds = coco.getAnnIds(catIds=catIds)
    # all_anns = coco.loadAnns(ids=annIds)
    # print(len(imgIds), imgIds)

    for id in imgIds:
        annt_ids = coco.getAnnIds(imgIds=[id])
        annotations_per_img = coco.loadAnns(ids=annt_ids)
        # print('All annots: ', len(annotations_per_img), annotations_per_img)
        img = coco.loadImgs(id)[0]
        image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type,
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:

        img_original = cv2.imread(image_path)
        img_connect = cv2.imread(image_path)
        img_recon = cv2.imread(image_path)
        print('Image id: ', id)

        for annt in annotations_per_img:
            if annt['iscrowd'] == 1 or type(annt['segmentation']) != list:

            polygons = get_connected_polygon_using_mask(
                annt['segmentation'], (h_img, w_img),
            gt_bbox = annt['bbox']
            gt_x1, gt_y1, gt_w, gt_h = gt_bbox
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > cfg.num_vertices:
                resampled_contour = resample(contour, num=cfg.num_vertices)
                resampled_contour = turning_angle_resample(
                    contour, cfg.num_vertices)

            resampled_contour[:, 0] = np.clip(resampled_contour[:, 0], gt_x1,
                                              gt_x1 + gt_w)
            resampled_contour[:, 1] = np.clip(resampled_contour[:, 1], gt_y1,
                                              gt_y1 + gt_h)

            clockwise_flag = check_clockwise_polygon(resampled_contour)
            if not clockwise_flag:
                fixed_contour = np.flip(resampled_contour, axis=0)
                fixed_contour = resampled_contour.copy()

            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            x1, y1, x2, y2 = gt_x1, gt_y1, gt_x1 + gt_w, gt_y1 + gt_h

            # bbox_width, bbox_height = x2 - x1, y2 - y1
            # bbox = [x1, y1, bbox_width, bbox_height]
            # bbox_center = np.array([(x1 + x2) / 2., (y1 + y2) / 2.])
            bbox_center = np.mean(indexed_shape, axis=0)

            centered_shape = indexed_shape - bbox_center

            # visualize resampled points with multiple parts in image side by side
            for cnt in range(len(annt['segmentation'])):
                polys = np.array(annt['segmentation'][cnt]).reshape((-1, 2))
                cv2.polylines(img_original, [polys.astype(np.int32)],
                              True, (10, 10, 255),
                # cv2.drawContours(img_original, [polys.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            cv2.polylines(img_connect, [indexed_shape.astype(np.int32)],
                          True, (10, 10, 255),
            # cv2.drawContours(img_connect, [indexed_shape.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            learned_val_codes, _ = fast_ista(centered_shape.reshape((1, -1)),
            recon_contour = np.matmul(learned_val_codes, dictionary).reshape(
                (-1, 2))
            recon_contour = recon_contour + bbox_center
            cv2.polylines(img_recon, [recon_contour.astype(np.int32)],
                          True, (10, 10, 255),
            # cv2.drawContours(img_recon, [recon_contour.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            # plot gt mean and std
            # image = cv2.imread(image_path)
            # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])),
            # #             axes=(int(contour_std[0]), int(contour_std[1])),
            # #             angle=0, startAngle=0, endAngle=360, color=(0, 255, 0),
            # #             thickness=2)
            # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)),
            #               pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)),
            #               color=(0, 255, 0), thickness=2)
            # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255))
            # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))),
            #               pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))),
            #               color=(255, 0, 0), thickness=2)
            # cv2.imshow('GT segments', image)
            # if cv2.waitKey() & 0xFF == ord('q'):
            #     break

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                scaled_size = np.array([img_width, img_height],

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                # segms, codes_ = ctsegm_scaled_decode_debug(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                #                             K=cfg.test_topk)
                segms = ctsegm_code_n_offset_decode(
                segms = segms.detach().cpu().numpy().reshape(
                    1, -1, segms.shape[2])[0]
                # codes_ = codes_.detach().cpu().numpy().reshape(1, -1, codes_.shape[2])[0]

                top_preds = {}
                code_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(
                        segms[:, 2 * j:2 * j + 2], imgs[scale]['center'],
                        (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 +
                      2] = transform_preds(
                                cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 +
                      4] = transform_preds(
                          segms[:, cfg.num_vertices * 2 +
                                2:cfg.num_vertices * 2 + 4],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 +
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale
                    # code_preds[j + 1] = codes_[inds, :]


            segms_and_scores = {
                j: np.concatenate([d[j] for d in segmentations], axis=0)
                for j in range(1, num_classes + 1)
            }  # a Dict label: segments
            # codes_and_scores = {j: np.concatenate([d[j] for d in predicted_codes], axis=0)
            #                     for j in range(1, num_classes + 1)}  # a Dict label: segments
            scores = np.hstack([
                segms_and_scores[j][:, cfg.num_vertices * 2 + 4]
                for j in range(1, num_classes + 1)

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 +
                                                     4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]
                    # codes_and_scores[j] = codes_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            output_image = original_image
            blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)
            # print(blend_mask.shape)

            for lab in segms_and_scores:
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    # c_ = codes_and_scores[lab][idx]
                    # for res in segms_and_scores[lab]:
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, w_img)
                    bbox[1] = np.clip(bbox[1], 0, h_img)
                    bbox[2] = np.clip(bbox[2], 0, w_img)
                    bbox[3] = np.clip(bbox[3], 0, h_img)
                    if score > cfg.detect_thres:
                        text = names[lab]  # + ' %.2f' % score
                        # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5)
                        polygon = contour.reshape((-1, 2))
                        # print('Shape: Poly -- ', polygon.shape)
                        # print(polygon)
                        polygon[:, 0] = np.clip(polygon[:, 0], 0, w_img - 1)
                        polygon[:, 1] = np.clip(polygon[:, 1], 0, h_img - 1)

                        # use bb tools to draw predictions
                        color = random.choice(COLOR_WORLD)
                        bb.add(output_image, bbox[0], bbox[1], bbox[2],
                               bbox[3], text, color)
                        cv2.polylines(output_image, [polygon.astype(np.int32)],

                        # color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                        # contour_mean = np.mean(polygon, axis=0)
                        # contour_std = np.std(polygon, axis=0)
                        # center_x, center_y = np.mean(polygon, axis=0).astype(np.int32)
                        # text_location = [bbox[0] + 1, bbox[1] + 1,
                        #                  bbox[1] + label_size[0][0] + 1,
                        #                  bbox[0] + label_size[0][1] + 1]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=color, thickness=1)
                        # cv2.rectangle(output_image, pt1=(int(np.min(polygon[:, 0])), int(np.min(polygon[:, 1]))),
                        #               pt2=(int(np.max(polygon[:, 0])), int(np.max(polygon[:, 1]))),
                        #               color=(0, 255, 0), thickness=1)
                        # cv2.polylines(output_image, [polygon.astype(np.int32)], True, color, thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5,
                        #             color=(255, 0, 0))
                        # cv2.putText(output_image, text, org=(int(bbox[0]), int(bbox[1])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.5,
                        #             color=color)

                        # show the histgram for predicted codes
                        # fig = plt.figure()
                        # plt.plot(np.arange(cfg.n_codes), c_.reshape((-1,)), color='green',
                        #          marker='o', linestyle='dashed', linewidth=2, markersize=6)
                        # plt.ylabel('Value of each coefficient')
                        # plt.xlabel('All predicted {} coefficients'.format(cfg.n_codes))
                        # plt.title('Distribution of the predicted coefficients for {}'.format(text))

            value = [255, 255, 255]
            dst_img = cv2.addWeighted(output_image, 0.5, blend_mask, 0.5, 0)
            dst_img[blend_mask == 0] = output_image[blend_mask == 0]
            img_original = cv2.copyMakeBorder(img_original, 0, 0, 0, 10,
                                              cv2.BORDER_CONSTANT, None, value)
            img_connect = cv2.copyMakeBorder(img_connect, 0, 0, 10, 10,
                                             cv2.BORDER_CONSTANT, None, value)
            img_recon = cv2.copyMakeBorder(img_recon, 0, 0, 10, 10,
                                           cv2.BORDER_CONSTANT, None, value)
            dst_img = cv2.copyMakeBorder(dst_img, 0, 0, 10, 0,
                                         cv2.BORDER_CONSTANT, None, value)
            im_cat = np.concatenate(
                (img_original, img_connect, img_recon, dst_img), axis=1)
            # im_cat = np.concatenate((img_original, img_connect, img_recon), axis=1)
            cv2.imshow('GT:Resample:Recons:Predict', im_cat)
            if cv2.waitKey() & 0xFF == ord('q'):
Esempio n. 29
def add_bounding_box(image, bb_info):
    xmin, ymin, xmax, ymax = bb_info["coordinates"]
    label = bb_info["label"] if "label" in bb_info else ""
    color = bb_info["color"] if "color" in bb_info else "green"

    bb.add(image, xmin, ymin, xmax, ymax, label, color)