Python preprocess 예제들, ssd_data.preprocess Python 예제들

예제 #1

0

파일 보기

    def predict_tbpp(self):
        self.resized_image, self.padding, self.im_shape = resize_and_pad(self.image_name)
        x = np.array([preprocess(self.resized_image, (512, 512))])
        preds = self.tbpp.predict(x, batch_size=1, verbose=1)
        res = self.prior_util.decode(preds[0], self.confidence_threshold, fast_nms=False)

        return res

예제 #2

0

파일 보기

파일: sl_rosnode.py 프로젝트: Intelligent-Systems-Phystech/2018-Project-10

    def callback(self, data):
        try:
            img = self.bridge.imgmsg_to_cv2(data, "bgr8")
        except CvBridgeError as e:
            print(e)

        input_size = self.input_shape[:2]

        vid_h, vid_w = img.shape[:2]

        # model to predict
        x = np.array([preprocess(img, input_size)])

        with self.graph.as_default():
            y = self.model.predict(x)

        result = self.prior_util.decode(y[0],
                                        segment_threshold=0.55,
                                        link_threshold=0.45)

        for r in result:
            xy = rbox_to_polygon(r[:5])
            xy = xy / input_size * [vid_w, vid_h]
            xy = xy.reshape((-1, 1, 2))
            xy = np.round(xy)
            xy = xy.astype(np.int32)
            cv2.polylines(img, [xy], True, (0, 0, 255))

        # calculate fps
        curr_time = timer()
        exec_time = curr_time - self.prev_time
        self.prev_time = curr_time
        accum_time = self.accum_time = self.accum_time + exec_time
        self.curr_fps = self.curr_fps + 1
        if accum_time > 1:
            accum_time = self.accum_time = accum_time - 1
            self.fps = "FPS: " + str(self.curr_fps)
            self.curr_fps = 0

        # draw fps
        cv2.rectangle(img, (0, 0), (50, 17), (255, 255, 255), -1)
        cv2.putText(img, self.fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                    (0, 0, 0), 1)

        #cv2.imshow("SegLink detection", img)
        #cv2.waitKey(10)

        try:
            self.image_pub.publish(self.bridge.cv2_to_imgmsg(img, "bgr8"))
        except CvBridgeError as e:
            print(e)

예제 #3

0

파일 보기

파일: sl_end2end_videotest.py 프로젝트: weian312/ssd_detectors

 prev_time = timer()
 
 input_size = input_shape[:2]
 
 record_buffer = []
 record_timestamps = []
 init_time = timer()
 
 while True:
     retval, img = vid.read()
     if not retval:
         print("Done!")
         break
         
     # model to predict 
     x = np.array([preprocess(img, input_size)])
     y = det_model.predict(x)
     
     result = prior_util.decode(y[0], segment_threshold, link_threshold)
     
     img1 = np.copy(img)
     img2 = np.zeros_like(img)
     
     # calculate fps
     curr_time = timer()
     exec_time = curr_time - prev_time
     prev_time = curr_time
     accum_time = accum_time + exec_time
     curr_fps = curr_fps + 1
     if accum_time > 1:
         accum_time = accum_time - 1

예제 #4

0

파일 보기

파일: views.py 프로젝트: Anirudh-RV/DataAnnotationEndToEndSystem

def index(request):
    decodeddata = request.body.decode('utf-8')
    dictdata = ast.literal_eval(decodeddata)
    username = dictdata["username"]
    imagename = dictdata["imagename"]
    imageurl = dictdata["imageurl"]

    start_time = time.time()
    # Final TextBox++ Code : (Works on just image)
    input_size = input_shape[:2]
    print(input_size)
    # getting the image
    url = imageurl
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))
    img = np.array(img)
    img_h = img.shape[0]
    img_w = img.shape[1]
    img1 = np.copy(img)
    img2 = np.zeros_like(img)

    # model to predict
    x = np.array([preprocess(img, input_size)])

    elapsed_time = time.time() - start_time
    print("Performace measure : " + str(elapsed_time))

    #Model start
    start_time = time.time()
    with sl_graph.as_default():
        with sl_session.as_default():
            y = sl_model.predict(x)

    elapsed_time = time.time() - start_time
    print("Performace measure : " + str(elapsed_time))
    #Model end

    start_time = time.time()
    result = prior_util.decode(y[0], confidence_threshold)

    if len(result) > 0:
        bboxs = result[:, 0:4]
        quads = result[:, 4:12]
        rboxes = result[:, 12:17]
        boxes = np.asarray([rbox3_to_polygon(r) for r in rboxes])
        xy = boxes
        xy = xy * [img_w, img_h]
        xy = np.round(xy)
        xy = xy.astype(np.int32)
        cv2.polylines(img1, tuple(xy), True, (0, 0, 255))
        rboxes = np.array(
            [polygon_to_rbox(b) for b in np.reshape(boxes, (-1, 4, 2))])
        bh = rboxes[:, 3]
        rboxes[:, 2] += bh * 0.1
        rboxes[:, 3] += bh * 0.2
        boxes = np.array([rbox_to_polygon(f) for f in rboxes])
        boxes = np.flip(boxes, axis=1)  # TODO: fix order of points, why?
        boxes = np.reshape(boxes, (-1, 8))
        boxes_mask_a = np.array([b[2] > b[3] for b in rboxes
                                 ])  # width > height, in square world
        boxes_mask_b = np.array([
            not (np.any(b < 0) or np.any(b > 512)) for b in boxes
        ])  # box inside image
        boxes_mask = np.logical_and(boxes_mask_a, boxes_mask_b)
        boxes = boxes[boxes_mask]
        rboxes = rboxes[boxes_mask]
        xy = xy[boxes_mask]
        if len(boxes) == 0:
            boxes = np.empty((0, 8))

    # draw
    saveimageindjango = 'assets/mloutput_' + username + "_" + imagename
    cv2.imwrite(saveimageindjango, img1)
    elapsed_time = time.time() - start_time
    print("Performace measure : " + str(elapsed_time))
    print("Sending to back end...")
    files = {'file': open(saveimageindjango, 'rb')}
    headers = {
        'username': username,
    }
    response = requests.request("POST",
                                'http://localhost:4000/upload',
                                files=files,
                                headers=headers)
    print(response)
    print("Backend Process Complete")
    context = {"data": "data"}
    return render(request, 'index.html', context)

예제 #5

0

파일 보기

파일: generate_tbpp_preds.py 프로젝트: seangtkelley/icken-and-chegg

    confs = []
    for angle in angles:
        rot_img, rot_mat, bounds = rotate_image(map_img, angle, original_shape)
        height = rot_img.shape[0]
        width = rot_img.shape[1]
        current_x = 0
        current_y = 0

        while current_y + crop_h < height:
            while current_x + crop_w < width:

                crop_img = rot_img[current_y:current_y + crop_h,
                                   current_x:current_x + crop_w]

                if do_preprocess:
                    crop_img = preprocess(crop_img, (512, 512))

                model_output = model.predict(np.array([crop_img]),
                                             batch_size=1,
                                             verbose=0)

                res = prior_util.decode(model_output[0],
                                        confidence_threshold,
                                        fast_nms=False)
                bboxes = res[:, 0:4]
                quades = res[:, 4:12]
                rboxes = res[:, 12:17]
                conf = res[:, 17:]

                for j in range(len(rboxes)):
                    # convert rbox

예제 #6

0

파일 보기

def detect_motion(frameCount):
    # lock variables
    global vs, outputFrame, lock

    # loop over frames from the video stream and edit anything here...
    while True:
        # read the next frame from the video stream, resize it,
        # convert the frame to grayscale, and blur it
        ret, frame = cap.read()
        print("READING FRAME")
        if frame is not None:
            # model to predict
            img = np.array(frame)
            img_h = img.shape[0]
            img_w = img.shape[1]
            img1 = np.copy(img)
            img2 = np.zeros_like(img)
            # model to predict
            x = np.array([preprocess(img, input_size)])
            #Model start
            start_time = time.time()
            with sl_graph.as_default():
                with sl_session.as_default():
                    y = sl_model.predict(x)
            #Model end

            result = prior_util.decode(y[0], confidence_threshold)
            if len(result) > 0:
                bboxs = result[:, 0:4]
                quads = result[:, 4:12]
                rboxes = result[:, 12:17]
                boxes = np.asarray([rbox3_to_polygon(r) for r in rboxes])
                xy = boxes
                xy = xy * [img_w, img_h]
                xy = np.round(xy)
                xy = xy.astype(np.int32)
                cv2.polylines(img1, tuple(xy), True, (0, 0, 255))
                rboxes = np.array([
                    polygon_to_rbox(b) for b in np.reshape(boxes, (-1, 4, 2))
                ])
                bh = rboxes[:, 3]
                rboxes[:, 2] += bh * 0.1
                rboxes[:, 3] += bh * 0.2
                boxes = np.array([rbox_to_polygon(f) for f in rboxes])
                boxes = np.flip(boxes,
                                axis=1)  # TODO: fix order of points, why?
                boxes = np.reshape(boxes, (-1, 8))
                boxes_mask_a = np.array([b[2] > b[3] for b in rboxes
                                         ])  # width > height, in square world
                boxes_mask_b = np.array([
                    not (np.any(b < 0) or np.any(b > 512)) for b in boxes
                ])  # box inside image
                boxes_mask = np.logical_and(boxes_mask_a, boxes_mask_b)
                boxes = boxes[boxes_mask]
                rboxes = rboxes[boxes_mask]
                xy = xy[boxes_mask]

                if len(boxes) == 0:
                    boxes = np.empty((0, 8))

            top = 10
            bottom = 10
            left = 10
            right = 10
            total_transcript = ""
            # draw fps
            frame = img1
        # acquire the lock, set the output frame, and release the
        # lock
        with lock:
            outputFrame = frame.copy()

예제 #7

0

파일 보기

파일: ssd_videotest.py 프로젝트: kimhan1113/ocr_project

 def run(self, video_path=0, start_frame=0, conf_thresh=0.6):
     """ Runs the test on a video (or webcam)
     
     # Arguments
     video_path: A file path to a video to be tested on. Can also be a number, 
                 in which case the webcam with the same number (i.e. 0) is 
                 used instead
                 
     start_frame: The number of the first frame of the video to be processed
                  by the network. 
                  
     conf_thresh: Threshold of confidence. Any boxes with lower confidence 
                  are not visualized.
                 
     """
     
     vid = cv2.VideoCapture(video_path)
     if not vid.isOpened():
         raise IOError(("Couldn't open video file or webcam. If you're "
         "trying to open a webcam, make sure you video_path is an integer!"))
     
     vid_w = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
     vid_h = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
     
     # Skip frames until reaching start_frame
     if start_frame > 0:
         vid.set(cv2.CAP_PROP_POS_MSEC, start_frame)
         
     accum_time = 0
     curr_fps = 0
     fps = "FPS: ??"
     prev_time = timer()
     
     input_size = self.input_shape[:2]
     
     while True:
         retval, img = vid.read()
         if not retval:
             print("Done!")
             return
             
         # model to predict 
         x = np.array([preprocess(img, input_size)])
         y = self.model.predict(x)
         
         result = self.prior_util.decode(y[0], confidence_threshold=conf_thresh)
         
         for r in result:
             xmin = int(round(r[0] * vid_w))
             ymin = int(round(r[1] * vid_h))
             xmax = int(round(r[2] * vid_w))
             ymax = int(round(r[3] * vid_h))
             conf = r[4]
             label = int(r[5])
             color = self.class_colors[label]
             text = self.class_names[label] + " " + ('%.2f' % conf)
             
             # draw box
             cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 2)
             
             # draw label
             text_top = (xmin, ymin-10)
             text_bot = (xmin + 90, ymin + 5)
             text_pos = (xmin + 5, ymin)
             cv2.rectangle(img, text_top, text_bot, color, -1)
             cv2.putText(img, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
         
         # Calculate FPS
         # This computes FPS for everything, not just the model's execution 
         # which may or may not be what you want
         curr_time = timer()
         exec_time = curr_time - prev_time
         prev_time = curr_time
         accum_time = accum_time + exec_time
         curr_fps = curr_fps + 1
         if accum_time > 1:
             accum_time = accum_time - 1
             fps = "FPS: " + str(curr_fps)
             curr_fps = 0
         
         # Draw FPS in top left corner
         cv2.rectangle(img, (0,0), (50, 17), (255,255,255), -1)
         cv2.putText(img, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
         
         cv2.imshow("SSD detection", img)
         cv2.waitKey(10)

예제 #8

0

파일 보기

def detect_motion(frameCount):
    # lock variables
    global vs, outputFrame, lock

    # loop over frames from the video stream and edit anything here...
    while True:
        # read the next frame from the video stream, resize it,
        # convert the frame to grayscale, and blur it
        previousCoordinates = ""
        peopleindex = 0
        peoplemapping = {}
        strPeopleMapping = ""
        ret, frame = cap.read()
        print("READING FRAME")
        if frame is not None:
            # yolo
            resultyolo = tfnet.return_predict(frame)
            # model to predict
            img = np.array(frame)
            img_h = img.shape[0]
            img_w = img.shape[1]
            img1 = np.copy(img)
            img2 = np.zeros_like(img)
            # model to predict
            x = np.array([preprocess(img, input_size)])
            #Model start
            start_time = time.time()
            with sl_graph.as_default():
                with sl_session.as_default():
                    y = sl_model.predict(x)
            #Model end
            result = prior_util.decode(y[0], confidence_threshold)
            if len(result) > 0:
                bboxs = result[:,0:4]
                quads = result[:,4:12]
                rboxes = result[:,12:17]
                boxes = np.asarray([rbox3_to_polygon(r) for r in rboxes])
                xy = boxes
                xy = xy * [img_w, img_h]
                xy = np.round(xy)
                xy = xy.astype(np.int32)
                cv2.polylines(img1, tuple(xy), True, (0,0,255))
                rboxes = np.array([polygon_to_rbox(b) for b in np.reshape(boxes, (-1,4,2))])
                bh = rboxes[:,3]
                rboxes[:,2] += bh * 0.1
                rboxes[:,3] += bh * 0.2
                boxes = np.array([rbox_to_polygon(f) for f in rboxes])
                boxes = np.flip(boxes, axis=1) # TODO: fix order of points, why?
                boxes = np.reshape(boxes, (-1, 8))
                boxes_mask_a = np.array([b[2] > b[3] for b in rboxes]) # width > height, in square world
                boxes_mask_b = np.array([not (np.any(b < 0) or np.any(b > 512)) for b in boxes]) # box inside image
                boxes_mask = np.logical_and(boxes_mask_a, boxes_mask_b)
                boxes = boxes[boxes_mask]
                rboxes = rboxes[boxes_mask]
                xy = xy[boxes_mask]

                if len(boxes) == 0:
                    boxes = np.empty((0,8))

            top = 10
            bottom = 10
            left = 10
            right = 10
            total_transcript = ""

            # To get the cropped out boxes and run pytesseract over it
            for i in xy:
                crop_img = img1[i[0][1]-5:i[2][1]+5,i[0][0]-5:i[2][0]+5]
                color = [255,255,255]
                crop_img = cv2.copyMakeBorder(crop_img, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color)
                transcript = pytesseract.image_to_string(crop_img, lang='eng').upper()
                total_transcript += transcript + "\n"
                print(transcript)

            print(total_transcript)
            # draw fps
            frame = img1

            # Start yolo process here
            currentCoordinates = ""

            # textbox++
            img = frame
            img_h = img.shape[0]
            img_w = img.shape[1]
            img1 = np.copy(img)
            coordinates = previousCoordinates.split("\n")
            coordinates.pop()
            # YOLO-9000 : Drawing Boxes
            peopleCount = 0
            for res in resultyolo:
                if res["label"] == "whole":
                    continue
                elif res["label"] != "person":
                    color = int(255 * res["confidence"])
                    top = (res["topleft"]["x"], res["topleft"]["y"])
                    bottom = (res["bottomright"]["x"], res["bottomright"]["y"])
                    # for each person
                    cv2.rectangle(frame, top, bottom, (255,0,0) , 2)
                    cv2.putText(frame, res["label"], top, cv2.FONT_HERSHEY_DUPLEX, 1.0, (0,0,255))

                elif res["label"] == "person":
                    peopleCount = peopleCount + 1
                    color = int(255 * res["confidence"])
                    top = (res["topleft"]["x"], res["topleft"]["y"])
                    bottom = (res["bottomright"]["x"], res["bottomright"]["y"])
                    topstr = "("+str(res["topleft"]["x"]) + \
                        ","+str(res["topleft"]["y"])+")"
                    bottomstr = "("+str(res["bottomright"]["x"]) + \
                        ","+str(res["bottomright"]["y"])+")"
                    coordinatesStr = {}
                    coordinatesStr['x1'] = top[0]
                    coordinatesStr['x2'] = bottom[0]
                    coordinatesStr['y1'] = top[1]
                    coordinatesStr['y2'] = bottom[1]
                    currentValue = topstr+" "+bottomstr
                    # IOU PART - BEGIN
                    currentCoordinates = currentCoordinates+topstr+" "+bottomstr+"\n"

                    # Calculate IoU here with top and bottom, compare each drawn image with top and bottom, select the max IoU
                    if previousCoordinates != "":
                        bb2 = {}
                        bb2['x1'] = top[0]
                        bb2['x2'] = bottom[0]
                        bb2['y1'] = top[1]
                        bb2['y2'] = bottom[1]

                        currentIou = 0
                        iouIndex = 0
                        for currentIndex, boxes in enumerate(coordinates):
                            boxesarr = boxes.split(" ")
                            top = ast.literal_eval(boxesarr[0])
                            bottom = ast.literal_eval(boxesarr[1])
                            bb1 = {}
                            bb1['x1'] = top[0]
                            bb1['x2'] = bottom[0]
                            bb1['y1'] = top[1]
                            bb1['y2'] = bottom[1]
                            result = get_iou(bb1, bb2)
                            temp = currentIou
                            currentIou = max(result, currentIou)
                            if temp != currentIou:
                                iouIndex = currentIndex
                        if currentIou != 0:
                            peoplemapping[currentValue] = peoplemapping[coordinates[iouIndex]]
                        # check for index:
                        try:
                            if peoplemapping[currentValue]:
                                pass
                        except:
                            peopleindex = peopleindex + 1
                            peoplemapping[currentValue] = peopleindex
                    else:
                        try:
                            if peoplemapping[currentValue]:
                                pass
                        except:
                            peopleindex = peopleindex + 1
                            peoplemapping[currentValue] = peopleindex

                    # IOU PART - END
                    strPeopleMapping = strPeopleMapping+currentValue+":"+str(peoplemapping[currentValue])+"|"
                    cv2.rectangle(img1,(coordinatesStr['x1'],coordinatesStr['y1']),(coordinatesStr['x2'],coordinatesStr['y2']), (255,0,0) , 2)
                    cv2.putText(img1,"index : "+str(peoplemapping[currentValue]),(coordinatesStr['x1'],coordinatesStr['y1']),cv2.FONT_HERSHEY_DUPLEX,1.0,(0,0,255))
                    frame = img1

        previousCoordinates = currentCoordinates
        strPeopleMapping = strPeopleMapping+"\n"
        # acquire the lock, set the output frame, and release the
        # lock
        with lock:
            outputFrame = frame.copy()

예제 #9

0

파일 보기

파일: tbpp_custom_utils.py 프로젝트: seangtkelley/icken-and-chegg

def tbpp_raw_generate_data(map_images_dir,
                           image_paths,
                           regions,
                           batch_size,
                           prior_util,
                           encode=True,
                           do_rotate=False,
                           do_preprocess=False):
    crop_h = 512
    crop_w = 512
    step = 400
    angles = range(-90, 95, 5) if do_rotate else [0]

    inputs, targets = [], []

    mean = np.array([104, 117, 123])

    idxs = np.arange(len(image_paths))
    np.random.shuffle(idxs)
    for _, i in enumerate(idxs):
        filepath = os.path.join(map_images_dir, image_paths[i])

        map_img = cv2.imread(filepath)
        original_shape = map_img.shape

        for angle in angles:
            rot_img, rot_mat, _ = rotate_image(map_img, angle, original_shape)
            height = rot_img.shape[0]
            width = rot_img.shape[1]
            current_x = 0
            current_y = 0

            while current_y + crop_h < height:
                while current_x + crop_w < width:

                    crop_img = rot_img[current_y:current_y + crop_h,
                                       current_x:current_x + crop_w]
                    if do_preprocess:
                        crop_img = preprocess(crop_img, (512, 512))

                    crop_boxes = []
                    for region in regions:
                        # rotate to orientation when image is not rotated
                        image_center = (original_shape[1] // 2,
                                        original_shape[0] // 2)
                        rot_mat = cv2.getRotationMatrix2D(image_center,
                                                          angle,
                                                          scale=1.0)

                        # add col for rotation
                        region = np.concatenate(
                            [region, np.ones([region.shape[0], 1])], axis=1)

                        # rotate
                        transformed_points = rot_mat.dot(region.T).T

                        pt1 = [
                            int(transformed_points[0][0]),
                            int(transformed_points[0][1])
                        ]
                        pt2 = [
                            int(transformed_points[1][0]),
                            int(transformed_points[1][1])
                        ]
                        pt3 = [
                            int(transformed_points[2][0]),
                            int(transformed_points[2][1])
                        ]
                        pt4 = [
                            int(transformed_points[3][0]),
                            int(transformed_points[3][1])
                        ]

                        region = np.array([pt1, pt2, pt3, pt4])

                        xmin = np.min(region[:, 0])
                        xmax = np.max(region[:, 0])
                        ymin = np.min(region[:, 1])
                        ymax = np.max(region[:, 1])

                        if xmin > current_x and xmax < (
                                current_x + crop_w) and ymin < (
                                    current_y + crop_h) and ymax > current_y:
                            crop_xmin = xmin - current_x
                            crop_ymin = ymin - current_y
                            crop_xmax = xmax - current_x
                            crop_ymax = ymax - current_y

                            crop_boxes.append([
                                crop_xmin, crop_ymax, crop_xmax, crop_ymax,
                                crop_xmax, crop_ymin, crop_xmin, crop_ymin
                            ])

                    crop_boxes = np.array(crop_boxes)
                    crop_boxes[:, 0::2] /= crop_img.shape[1]
                    crop_boxes[:, 1::2] /= crop_img.shape[0]

                    # append classes
                    crop_boxes = np.concatenate(
                        [crop_boxes,
                         np.ones([crop_boxes.shape[0], 1])],
                        axis=1)

                    crop_img -= mean[np.newaxis, np.newaxis, :]
                    #img = img / 25.6

                    inputs.append(crop_img)
                    targets.append(crop_boxes)

                    #if len(targets) == batch_size or j == len(idxs)-1: # last batch in epoch can be smaller then batch_size
                    if len(targets) == batch_size:
                        if encode:
                            targets = [prior_util.encode(y) for y in targets]
                            targets = np.array(targets, dtype=np.float32)
                        tmp_inputs = np.array(inputs, dtype=np.float32)
                        tmp_targets = np.array(targets, dtype=np.float32)
                        inputs, targets = [], []
                        yield tmp_inputs, tmp_targets

                    current_x += step

            current_x = 0
            current_y += step

        print('NEW epoch')
    print('EXIT generator')

예제 #10

0

파일 보기

파일: sl_videotest.py 프로젝트: PrimadonnaGit/tpgr_ssd_detectors

 def run(self, video_path=0, start_frame=0, segment_threshold=0.55, link_threshold=0.45):
     """ Runs the test on a video (or webcam)
     
     # Arguments
     video_path: A file path to a video to be tested on. Can also be a number, 
                 in which case the webcam with the same number (i.e. 0) is 
                 used instead
                 
     start_frame: The number of the first frame of the video to be processed
                  by the network. 
                  
     conf_thresh: Threshold of confidence. Any boxes with lower confidence 
                  are not visualized.
                 
     """
 
     vid = cv2.VideoCapture(video_path)
     if not vid.isOpened():
         raise IOError(("Couldn't open video file or webcam. If you're "
         "trying to open a webcam, make sure you video_path is an integer!"))
     
     vid_w = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
     vid_h = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
     
     # skip frames until reaching start_frame
     if start_frame > 0:
         vid.set(cv2.CAP_PROP_POS_MSEC, start_frame)
         
     accum_time = 0
     curr_fps = 0
     fps = "FPS: ??"
     prev_time = timer()
     
     input_size = self.input_shape[:2]
     
     while True:
         retval, img = vid.read()
         if not retval:
             print("Done!")
             return
             
         # model to predict 
         x = np.array([preprocess(img, input_size)])
         y = self.model.predict(x)
         
         result = self.prior_util.decode(y[0], segment_threshold, link_threshold)
         
         for r in result:
             xy = rbox_to_polygon(r[:5])
             xy = xy / input_size * [vid_w, vid_h]
             xy = xy.reshape((-1,1,2))
             xy = np.round(xy)
             xy = xy.astype(np.int32)
             cv2.polylines(img, [xy], True, (0,0,255))
             
         # calculate fps
         curr_time = timer()
         exec_time = curr_time - prev_time
         prev_time = curr_time
         accum_time = accum_time + exec_time
         curr_fps = curr_fps + 1
         if accum_time > 1:
             accum_time = accum_time - 1
             fps = "FPS: " + str(curr_fps)
             curr_fps = 0
         
         # draw fps
         cv2.rectangle(img, (0,0), (50, 17), (255,255,255), -1)
         cv2.putText(img, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
         
         cv2.imshow("SegLink detection", img)
         cv2.waitKey(10)

예제 #11

0

파일 보기

파일: source.py 프로젝트: VadymBezdushnyi/ssd_detectors

# In[6]
inputs = []
images = []
data = []

gtu = gt_util_val

np.random.seed(1337)

for i in np.random.randint(0, gtu.num_samples, 16):

    img_path = os.path.join(gtu.image_path, gtu.image_names[i])
    img = cv2.imread(img_path)
    
    inputs.append(preprocess(img, image_size))
    
    h, w = image_size
    img = cv2.resize(img, (w,h), cv2.INTER_LINEAR).astype('float32') # should we do resizing
    img = img[:, :, (2,1,0)] # BGR to RGB
    img /= 255
    images.append(img)
    
    boxes = gtu.data[i]
    data.append(boxes)

inputs = np.asarray(inputs)

test_idx = 0
test_input = inputs[test_idx]
test_img = images[test_idx]