def predict_tbpp(self): self.resized_image, self.padding, self.im_shape = resize_and_pad(self.image_name) x = np.array([preprocess(self.resized_image, (512, 512))]) preds = self.tbpp.predict(x, batch_size=1, verbose=1) res = self.prior_util.decode(preds[0], self.confidence_threshold, fast_nms=False) return res
def callback(self, data): try: img = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) input_size = self.input_shape[:2] vid_h, vid_w = img.shape[:2] # model to predict x = np.array([preprocess(img, input_size)]) with self.graph.as_default(): y = self.model.predict(x) result = self.prior_util.decode(y[0], segment_threshold=0.55, link_threshold=0.45) for r in result: xy = rbox_to_polygon(r[:5]) xy = xy / input_size * [vid_w, vid_h] xy = xy.reshape((-1, 1, 2)) xy = np.round(xy) xy = xy.astype(np.int32) cv2.polylines(img, [xy], True, (0, 0, 255)) # calculate fps curr_time = timer() exec_time = curr_time - self.prev_time self.prev_time = curr_time accum_time = self.accum_time = self.accum_time + exec_time self.curr_fps = self.curr_fps + 1 if accum_time > 1: accum_time = self.accum_time = accum_time - 1 self.fps = "FPS: " + str(self.curr_fps) self.curr_fps = 0 # draw fps cv2.rectangle(img, (0, 0), (50, 17), (255, 255, 255), -1) cv2.putText(img, self.fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) #cv2.imshow("SegLink detection", img) #cv2.waitKey(10) try: self.image_pub.publish(self.bridge.cv2_to_imgmsg(img, "bgr8")) except CvBridgeError as e: print(e)
prev_time = timer() input_size = input_shape[:2] record_buffer = [] record_timestamps = [] init_time = timer() while True: retval, img = vid.read() if not retval: print("Done!") break # model to predict x = np.array([preprocess(img, input_size)]) y = det_model.predict(x) result = prior_util.decode(y[0], segment_threshold, link_threshold) img1 = np.copy(img) img2 = np.zeros_like(img) # calculate fps curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1
def index(request): decodeddata = request.body.decode('utf-8') dictdata = ast.literal_eval(decodeddata) username = dictdata["username"] imagename = dictdata["imagename"] imageurl = dictdata["imageurl"] start_time = time.time() # Final TextBox++ Code : (Works on just image) input_size = input_shape[:2] print(input_size) # getting the image url = imageurl response = requests.get(url) img = Image.open(BytesIO(response.content)) img = np.array(img) img_h = img.shape[0] img_w = img.shape[1] img1 = np.copy(img) img2 = np.zeros_like(img) # model to predict x = np.array([preprocess(img, input_size)]) elapsed_time = time.time() - start_time print("Performace measure : " + str(elapsed_time)) #Model start start_time = time.time() with sl_graph.as_default(): with sl_session.as_default(): y = sl_model.predict(x) elapsed_time = time.time() - start_time print("Performace measure : " + str(elapsed_time)) #Model end start_time = time.time() result = prior_util.decode(y[0], confidence_threshold) if len(result) > 0: bboxs = result[:, 0:4] quads = result[:, 4:12] rboxes = result[:, 12:17] boxes = np.asarray([rbox3_to_polygon(r) for r in rboxes]) xy = boxes xy = xy * [img_w, img_h] xy = np.round(xy) xy = xy.astype(np.int32) cv2.polylines(img1, tuple(xy), True, (0, 0, 255)) rboxes = np.array( [polygon_to_rbox(b) for b in np.reshape(boxes, (-1, 4, 2))]) bh = rboxes[:, 3] rboxes[:, 2] += bh * 0.1 rboxes[:, 3] += bh * 0.2 boxes = np.array([rbox_to_polygon(f) for f in rboxes]) boxes = np.flip(boxes, axis=1) # TODO: fix order of points, why? boxes = np.reshape(boxes, (-1, 8)) boxes_mask_a = np.array([b[2] > b[3] for b in rboxes ]) # width > height, in square world boxes_mask_b = np.array([ not (np.any(b < 0) or np.any(b > 512)) for b in boxes ]) # box inside image boxes_mask = np.logical_and(boxes_mask_a, boxes_mask_b) boxes = boxes[boxes_mask] rboxes = rboxes[boxes_mask] xy = xy[boxes_mask] if len(boxes) == 0: boxes = np.empty((0, 8)) # draw saveimageindjango = 'assets/mloutput_' + username + "_" + imagename cv2.imwrite(saveimageindjango, img1) elapsed_time = time.time() - start_time print("Performace measure : " + str(elapsed_time)) print("Sending to back end...") files = {'file': open(saveimageindjango, 'rb')} headers = { 'username': username, } response = requests.request("POST", 'http://localhost:4000/upload', files=files, headers=headers) print(response) print("Backend Process Complete") context = {"data": "data"} return render(request, 'index.html', context)
confs = [] for angle in angles: rot_img, rot_mat, bounds = rotate_image(map_img, angle, original_shape) height = rot_img.shape[0] width = rot_img.shape[1] current_x = 0 current_y = 0 while current_y + crop_h < height: while current_x + crop_w < width: crop_img = rot_img[current_y:current_y + crop_h, current_x:current_x + crop_w] if do_preprocess: crop_img = preprocess(crop_img, (512, 512)) model_output = model.predict(np.array([crop_img]), batch_size=1, verbose=0) res = prior_util.decode(model_output[0], confidence_threshold, fast_nms=False) bboxes = res[:, 0:4] quades = res[:, 4:12] rboxes = res[:, 12:17] conf = res[:, 17:] for j in range(len(rboxes)): # convert rbox
def detect_motion(frameCount): # lock variables global vs, outputFrame, lock # loop over frames from the video stream and edit anything here... while True: # read the next frame from the video stream, resize it, # convert the frame to grayscale, and blur it ret, frame = cap.read() print("READING FRAME") if frame is not None: # model to predict img = np.array(frame) img_h = img.shape[0] img_w = img.shape[1] img1 = np.copy(img) img2 = np.zeros_like(img) # model to predict x = np.array([preprocess(img, input_size)]) #Model start start_time = time.time() with sl_graph.as_default(): with sl_session.as_default(): y = sl_model.predict(x) #Model end result = prior_util.decode(y[0], confidence_threshold) if len(result) > 0: bboxs = result[:, 0:4] quads = result[:, 4:12] rboxes = result[:, 12:17] boxes = np.asarray([rbox3_to_polygon(r) for r in rboxes]) xy = boxes xy = xy * [img_w, img_h] xy = np.round(xy) xy = xy.astype(np.int32) cv2.polylines(img1, tuple(xy), True, (0, 0, 255)) rboxes = np.array([ polygon_to_rbox(b) for b in np.reshape(boxes, (-1, 4, 2)) ]) bh = rboxes[:, 3] rboxes[:, 2] += bh * 0.1 rboxes[:, 3] += bh * 0.2 boxes = np.array([rbox_to_polygon(f) for f in rboxes]) boxes = np.flip(boxes, axis=1) # TODO: fix order of points, why? boxes = np.reshape(boxes, (-1, 8)) boxes_mask_a = np.array([b[2] > b[3] for b in rboxes ]) # width > height, in square world boxes_mask_b = np.array([ not (np.any(b < 0) or np.any(b > 512)) for b in boxes ]) # box inside image boxes_mask = np.logical_and(boxes_mask_a, boxes_mask_b) boxes = boxes[boxes_mask] rboxes = rboxes[boxes_mask] xy = xy[boxes_mask] if len(boxes) == 0: boxes = np.empty((0, 8)) top = 10 bottom = 10 left = 10 right = 10 total_transcript = "" # draw fps frame = img1 # acquire the lock, set the output frame, and release the # lock with lock: outputFrame = frame.copy()
def run(self, video_path=0, start_frame=0, conf_thresh=0.6): """ Runs the test on a video (or webcam) # Arguments video_path: A file path to a video to be tested on. Can also be a number, in which case the webcam with the same number (i.e. 0) is used instead start_frame: The number of the first frame of the video to be processed by the network. conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized. """ vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError(("Couldn't open video file or webcam. If you're " "trying to open a webcam, make sure you video_path is an integer!")) vid_w = vid.get(cv2.CAP_PROP_FRAME_WIDTH) vid_h = vid.get(cv2.CAP_PROP_FRAME_HEIGHT) # Skip frames until reaching start_frame if start_frame > 0: vid.set(cv2.CAP_PROP_POS_MSEC, start_frame) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() input_size = self.input_shape[:2] while True: retval, img = vid.read() if not retval: print("Done!") return # model to predict x = np.array([preprocess(img, input_size)]) y = self.model.predict(x) result = self.prior_util.decode(y[0], confidence_threshold=conf_thresh) for r in result: xmin = int(round(r[0] * vid_w)) ymin = int(round(r[1] * vid_h)) xmax = int(round(r[2] * vid_w)) ymax = int(round(r[3] * vid_h)) conf = r[4] label = int(r[5]) color = self.class_colors[label] text = self.class_names[label] + " " + ('%.2f' % conf) # draw box cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 2) # draw label text_top = (xmin, ymin-10) text_bot = (xmin + 90, ymin + 5) text_pos = (xmin + 5, ymin) cv2.rectangle(img, text_top, text_bot, color, -1) cv2.putText(img, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) # Calculate FPS # This computes FPS for everything, not just the model's execution # which may or may not be what you want curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 # Draw FPS in top left corner cv2.rectangle(img, (0,0), (50, 17), (255,255,255), -1) cv2.putText(img, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) cv2.imshow("SSD detection", img) cv2.waitKey(10)
def detect_motion(frameCount): # lock variables global vs, outputFrame, lock # loop over frames from the video stream and edit anything here... while True: # read the next frame from the video stream, resize it, # convert the frame to grayscale, and blur it previousCoordinates = "" peopleindex = 0 peoplemapping = {} strPeopleMapping = "" ret, frame = cap.read() print("READING FRAME") if frame is not None: # yolo resultyolo = tfnet.return_predict(frame) # model to predict img = np.array(frame) img_h = img.shape[0] img_w = img.shape[1] img1 = np.copy(img) img2 = np.zeros_like(img) # model to predict x = np.array([preprocess(img, input_size)]) #Model start start_time = time.time() with sl_graph.as_default(): with sl_session.as_default(): y = sl_model.predict(x) #Model end result = prior_util.decode(y[0], confidence_threshold) if len(result) > 0: bboxs = result[:,0:4] quads = result[:,4:12] rboxes = result[:,12:17] boxes = np.asarray([rbox3_to_polygon(r) for r in rboxes]) xy = boxes xy = xy * [img_w, img_h] xy = np.round(xy) xy = xy.astype(np.int32) cv2.polylines(img1, tuple(xy), True, (0,0,255)) rboxes = np.array([polygon_to_rbox(b) for b in np.reshape(boxes, (-1,4,2))]) bh = rboxes[:,3] rboxes[:,2] += bh * 0.1 rboxes[:,3] += bh * 0.2 boxes = np.array([rbox_to_polygon(f) for f in rboxes]) boxes = np.flip(boxes, axis=1) # TODO: fix order of points, why? boxes = np.reshape(boxes, (-1, 8)) boxes_mask_a = np.array([b[2] > b[3] for b in rboxes]) # width > height, in square world boxes_mask_b = np.array([not (np.any(b < 0) or np.any(b > 512)) for b in boxes]) # box inside image boxes_mask = np.logical_and(boxes_mask_a, boxes_mask_b) boxes = boxes[boxes_mask] rboxes = rboxes[boxes_mask] xy = xy[boxes_mask] if len(boxes) == 0: boxes = np.empty((0,8)) top = 10 bottom = 10 left = 10 right = 10 total_transcript = "" # To get the cropped out boxes and run pytesseract over it for i in xy: crop_img = img1[i[0][1]-5:i[2][1]+5,i[0][0]-5:i[2][0]+5] color = [255,255,255] crop_img = cv2.copyMakeBorder(crop_img, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color) transcript = pytesseract.image_to_string(crop_img, lang='eng').upper() total_transcript += transcript + "\n" print(transcript) print(total_transcript) # draw fps frame = img1 # Start yolo process here currentCoordinates = "" # textbox++ img = frame img_h = img.shape[0] img_w = img.shape[1] img1 = np.copy(img) coordinates = previousCoordinates.split("\n") coordinates.pop() # YOLO-9000 : Drawing Boxes peopleCount = 0 for res in resultyolo: if res["label"] == "whole": continue elif res["label"] != "person": color = int(255 * res["confidence"]) top = (res["topleft"]["x"], res["topleft"]["y"]) bottom = (res["bottomright"]["x"], res["bottomright"]["y"]) # for each person cv2.rectangle(frame, top, bottom, (255,0,0) , 2) cv2.putText(frame, res["label"], top, cv2.FONT_HERSHEY_DUPLEX, 1.0, (0,0,255)) elif res["label"] == "person": peopleCount = peopleCount + 1 color = int(255 * res["confidence"]) top = (res["topleft"]["x"], res["topleft"]["y"]) bottom = (res["bottomright"]["x"], res["bottomright"]["y"]) topstr = "("+str(res["topleft"]["x"]) + \ ","+str(res["topleft"]["y"])+")" bottomstr = "("+str(res["bottomright"]["x"]) + \ ","+str(res["bottomright"]["y"])+")" coordinatesStr = {} coordinatesStr['x1'] = top[0] coordinatesStr['x2'] = bottom[0] coordinatesStr['y1'] = top[1] coordinatesStr['y2'] = bottom[1] currentValue = topstr+" "+bottomstr # IOU PART - BEGIN currentCoordinates = currentCoordinates+topstr+" "+bottomstr+"\n" # Calculate IoU here with top and bottom, compare each drawn image with top and bottom, select the max IoU if previousCoordinates != "": bb2 = {} bb2['x1'] = top[0] bb2['x2'] = bottom[0] bb2['y1'] = top[1] bb2['y2'] = bottom[1] currentIou = 0 iouIndex = 0 for currentIndex, boxes in enumerate(coordinates): boxesarr = boxes.split(" ") top = ast.literal_eval(boxesarr[0]) bottom = ast.literal_eval(boxesarr[1]) bb1 = {} bb1['x1'] = top[0] bb1['x2'] = bottom[0] bb1['y1'] = top[1] bb1['y2'] = bottom[1] result = get_iou(bb1, bb2) temp = currentIou currentIou = max(result, currentIou) if temp != currentIou: iouIndex = currentIndex if currentIou != 0: peoplemapping[currentValue] = peoplemapping[coordinates[iouIndex]] # check for index: try: if peoplemapping[currentValue]: pass except: peopleindex = peopleindex + 1 peoplemapping[currentValue] = peopleindex else: try: if peoplemapping[currentValue]: pass except: peopleindex = peopleindex + 1 peoplemapping[currentValue] = peopleindex # IOU PART - END strPeopleMapping = strPeopleMapping+currentValue+":"+str(peoplemapping[currentValue])+"|" cv2.rectangle(img1,(coordinatesStr['x1'],coordinatesStr['y1']),(coordinatesStr['x2'],coordinatesStr['y2']), (255,0,0) , 2) cv2.putText(img1,"index : "+str(peoplemapping[currentValue]),(coordinatesStr['x1'],coordinatesStr['y1']),cv2.FONT_HERSHEY_DUPLEX,1.0,(0,0,255)) frame = img1 previousCoordinates = currentCoordinates strPeopleMapping = strPeopleMapping+"\n" # acquire the lock, set the output frame, and release the # lock with lock: outputFrame = frame.copy()
def tbpp_raw_generate_data(map_images_dir, image_paths, regions, batch_size, prior_util, encode=True, do_rotate=False, do_preprocess=False): crop_h = 512 crop_w = 512 step = 400 angles = range(-90, 95, 5) if do_rotate else [0] inputs, targets = [], [] mean = np.array([104, 117, 123]) idxs = np.arange(len(image_paths)) np.random.shuffle(idxs) for _, i in enumerate(idxs): filepath = os.path.join(map_images_dir, image_paths[i]) map_img = cv2.imread(filepath) original_shape = map_img.shape for angle in angles: rot_img, rot_mat, _ = rotate_image(map_img, angle, original_shape) height = rot_img.shape[0] width = rot_img.shape[1] current_x = 0 current_y = 0 while current_y + crop_h < height: while current_x + crop_w < width: crop_img = rot_img[current_y:current_y + crop_h, current_x:current_x + crop_w] if do_preprocess: crop_img = preprocess(crop_img, (512, 512)) crop_boxes = [] for region in regions: # rotate to orientation when image is not rotated image_center = (original_shape[1] // 2, original_shape[0] // 2) rot_mat = cv2.getRotationMatrix2D(image_center, angle, scale=1.0) # add col for rotation region = np.concatenate( [region, np.ones([region.shape[0], 1])], axis=1) # rotate transformed_points = rot_mat.dot(region.T).T pt1 = [ int(transformed_points[0][0]), int(transformed_points[0][1]) ] pt2 = [ int(transformed_points[1][0]), int(transformed_points[1][1]) ] pt3 = [ int(transformed_points[2][0]), int(transformed_points[2][1]) ] pt4 = [ int(transformed_points[3][0]), int(transformed_points[3][1]) ] region = np.array([pt1, pt2, pt3, pt4]) xmin = np.min(region[:, 0]) xmax = np.max(region[:, 0]) ymin = np.min(region[:, 1]) ymax = np.max(region[:, 1]) if xmin > current_x and xmax < ( current_x + crop_w) and ymin < ( current_y + crop_h) and ymax > current_y: crop_xmin = xmin - current_x crop_ymin = ymin - current_y crop_xmax = xmax - current_x crop_ymax = ymax - current_y crop_boxes.append([ crop_xmin, crop_ymax, crop_xmax, crop_ymax, crop_xmax, crop_ymin, crop_xmin, crop_ymin ]) crop_boxes = np.array(crop_boxes) crop_boxes[:, 0::2] /= crop_img.shape[1] crop_boxes[:, 1::2] /= crop_img.shape[0] # append classes crop_boxes = np.concatenate( [crop_boxes, np.ones([crop_boxes.shape[0], 1])], axis=1) crop_img -= mean[np.newaxis, np.newaxis, :] #img = img / 25.6 inputs.append(crop_img) targets.append(crop_boxes) #if len(targets) == batch_size or j == len(idxs)-1: # last batch in epoch can be smaller then batch_size if len(targets) == batch_size: if encode: targets = [prior_util.encode(y) for y in targets] targets = np.array(targets, dtype=np.float32) tmp_inputs = np.array(inputs, dtype=np.float32) tmp_targets = np.array(targets, dtype=np.float32) inputs, targets = [], [] yield tmp_inputs, tmp_targets current_x += step current_x = 0 current_y += step print('NEW epoch') print('EXIT generator')
def run(self, video_path=0, start_frame=0, segment_threshold=0.55, link_threshold=0.45): """ Runs the test on a video (or webcam) # Arguments video_path: A file path to a video to be tested on. Can also be a number, in which case the webcam with the same number (i.e. 0) is used instead start_frame: The number of the first frame of the video to be processed by the network. conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized. """ vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError(("Couldn't open video file or webcam. If you're " "trying to open a webcam, make sure you video_path is an integer!")) vid_w = vid.get(cv2.CAP_PROP_FRAME_WIDTH) vid_h = vid.get(cv2.CAP_PROP_FRAME_HEIGHT) # skip frames until reaching start_frame if start_frame > 0: vid.set(cv2.CAP_PROP_POS_MSEC, start_frame) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() input_size = self.input_shape[:2] while True: retval, img = vid.read() if not retval: print("Done!") return # model to predict x = np.array([preprocess(img, input_size)]) y = self.model.predict(x) result = self.prior_util.decode(y[0], segment_threshold, link_threshold) for r in result: xy = rbox_to_polygon(r[:5]) xy = xy / input_size * [vid_w, vid_h] xy = xy.reshape((-1,1,2)) xy = np.round(xy) xy = xy.astype(np.int32) cv2.polylines(img, [xy], True, (0,0,255)) # calculate fps curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 # draw fps cv2.rectangle(img, (0,0), (50, 17), (255,255,255), -1) cv2.putText(img, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) cv2.imshow("SegLink detection", img) cv2.waitKey(10)
# In[6] inputs = [] images = [] data = [] gtu = gt_util_val np.random.seed(1337) for i in np.random.randint(0, gtu.num_samples, 16): img_path = os.path.join(gtu.image_path, gtu.image_names[i]) img = cv2.imread(img_path) inputs.append(preprocess(img, image_size)) h, w = image_size img = cv2.resize(img, (w,h), cv2.INTER_LINEAR).astype('float32') # should we do resizing img = img[:, :, (2,1,0)] # BGR to RGB img /= 255 images.append(img) boxes = gtu.data[i] data.append(boxes) inputs = np.asarray(inputs) test_idx = 0 test_input = inputs[test_idx] test_img = images[test_idx]