def writeVideoOutput(self, frameNum, list_detections, tracks): if self.input_images_dir is None or self.output_video_path is None: return # Beolvasom a megfelelő képkockát img = cv2.imread(self.dict_frame2path[frameNum]) # Resizeolom img = cv2.resize(img, (self.out_vid_width, self.out_vid_height), interpolation=cv2.INTER_AREA) # Detection Boxot rajzolok rá... bb_xyxy = [det['box'] for det in list_detections] all1 = [None] * len(bb_xyxy) img = draw_bboxes(img, bb_xyxy, all1) #print(list_detections[10]['box'], list_detections[10]['bigBox'], list_detections[10]['worldXY'], '\n\n\n') resizeFactor = self.resolution[0] / 2560 #print([x for x in tracks if x[4] == 1]) # Trackeket rajzolok rá if len(tracks) > 0 and True: # TODO: bbox_xyxy = tracks[:, :4] * resizeFactor identities = tracks[:, 4] img = draw_bboxes(img, bbox_xyxy, identities) self.out_vid_height, self.out_vid_width # Frame Numbert is felrajzolom img = draw_frameNum( img, (self.out_vid_width // 2, self.out_vid_height // 10), frameNum) # Write to file self.output.write(img)
def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 avg_fps = 0.0 while self.vdo.grab(): frame_no +=1 _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax] results = test_net(im, net, detector, args.cuda, BaseTransform(net.size, rgb_means, (2, 0, 1)), top_k, thresh=0.4) # RFBNet使用教程 bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results) if bbox_xywh is not None: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
def detect(self): xmin, ymin, xmax, ymax = self.area while True: start = time.time() ori_im = ipcam.getframe() im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] bbox_xywh, cls_conf, cls_ids = self.yolo3(im) if bbox_xywh is not None: mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
def run(self, video_path, output_path): # open input video assert os.path.isfile(video_path), "Error: invalid video path" vdo = cv2.VideoCapture() vdo.open(video_path) # open output video im_width = int(vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) im_height = int(vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*"MJPG") output_vdo = cv2.VideoWriter(output_path, fourcc, 20, (im_width, im_height)) # track each frame in video start_time = time.time() frame_cnt = 0 while vdo.grab(): frame_cnt += 1 _, ori_im = vdo.retrieve() im = ori_im[0:im_height, 0:im_width] detection = self.detector.run(im)["results"][1] bbox_xywh, conf = Detector._bbox_to_xywh_cls_conf(detection, self.args.min_confidence) outputs = self.deepsort.update(bbox_xywh, conf, im) if(len(outputs) > 0): bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) elapsed_time = time.time() - start_time print("Frame {:05d}, Time {:.3f}s, FPS {:.3f}".format( frame_cnt, elapsed_time, frame_cnt / elapsed_time)) output_vdo.write(ori_im)
def detect(self): while self.vdo.grab(): start = time.time() _, ori_im = self.vdo.retrieve() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) if bbox_xcycwh is not None: # select class person mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im)
def iterate_tracker(self, img): start = time.time() ori_im = img.copy() im = ori_im.copy() bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) if bbox_xcycwh is not None: # select class person mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) return_json = dict() return_json["tracks"] = [] for i, box in enumerate(bbox_xyxy): #box = bbox_xyxy[i] point_x_left = box[0] point_x_right = box[2] if (point_x_left + point_x_right) / 2 > 240.0: point_x = point_x_left else: point_x = point_x_right point_y = box[1] ret_x = int(point_x / 480.0 * 1920) ret_y = int(point_y / 270.0 * 1080) return_json["curr_speaker_id"] = 0 return_json["curr_speaker_score"] = 20 track_data = dict() track_data["x"] = ret_x track_data["y"] = ret_y track_data["speaker_id"] = identities[i] - 1 return_json["tracks"].append(track_data) #return_json["tracks"].append([identities[i]-1, ret_x, ret_y]) #return_msg = '{},{}'.format(identities[i]-1,ret_x) #return_msgs.append('{},{}'.format(identities[i]-1,ret_x)) return_msg = str(return_json).replace("\'", "\"") else: return_msg = 'None' else: return_msg = 'None' end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) #if self.args.save_path: # self.output.write(ori_im) return return_msg
def writeVideoOutput(self, frameNum, list_detections, tracks, deadtracks, draw_detections=True, draw_tracks=True, draw_deadtracks=True): if self.input_images_dir is None or self.output_video_path is None: return # Beolvasom a megfelelő képkockát img = cv2.imread(self.dict_frame2path[frameNum]) # Resizeolom img = cv2.resize(img, (self.out_vid_width, self.out_vid_height), interpolation=cv2.INTER_AREA) # Detection Boxot rajzolok rá... if draw_detections: bb_xyxy = [det['box'] for det in list_detections] all1 = [None] * len(bb_xyxy) img = draw_bboxes(img, bb_xyxy, all1) resizeFactor = self.resolution[0] / 2560 # Trackeket rajzolok rá if len(tracks) > 0 and draw_tracks: bbox_xyxy = tracks[:, :4] * resizeFactor identities = tracks[:, 4] img = draw_bboxes(img, bbox_xyxy, identities) # Draw boxes for dead tracks for debugging if len(deadtracks) > 0 and draw_deadtracks: bbox_xyxy = [x[:4] for x in deadtracks] bbox_xyxy = [np.array(c) * resizeFactor for c in bbox_xyxy] labels = [x[4] for x in deadtracks] img = draw_dead_bboxes(img, bbox_xyxy, labels) # Frame Numbert is felrajzolom img = draw_frameNum( img, (self.out_vid_width // 2, self.out_vid_height // 10), frameNum) # Write to file self.output.write(img)
def detect(self): sys.path.append('..') from face_detection.config_farm import configuration_10_560_25L_8scales_v1 as cfg import mxnet symbol_file_path = '/content/deep_sort_pytorch/face_detection/symbol_farm/symbol_10_560_25L_8scales_v1_deploy.json' model_file_path = '/content/deep_sort_pytorch/face_detection/saved_model/configuration_10_560_25L_8scales_v1/train_10_560_25L_8scales_v1_iter_1400000.params' my_predictor = Predict( mxnet=mxnet, symbol_file_path=symbol_file_path, model_file_path=model_file_path, ctx=mxnet.gpu(0), receptive_field_list=cfg.param_receptive_field_list, receptive_field_stride=cfg.param_receptive_field_stride, bbox_small_list=cfg.param_bbox_small_list, bbox_large_list=cfg.param_bbox_large_list, receptive_field_center_start=cfg. param_receptive_field_center_start, num_output_scales=cfg.param_num_output_scales) cap = cv2.VideoCapture('/content/faces.mp4') fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('/content/output_de.avi', fourcc, 20.0, (1920, 1080)) while True: start = time.time() _, ori_im = cap.read() # im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im # bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) bboxes = my_predictor.predict(im, resize_scale=1, score_threshold=0.3, top_k=10000, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]) bboxes = np.array(bboxes) bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(bboxes) if bbox_xywh is not None: # select class person # mask = cls_ids==0 # bbox_xcycwh = bbox_xcycwh[mask] # bbox_xcycwh[:,3:] *= 1.2 # cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) print(ori_im.shape) out.write(ori_im)
def run(self, sequence_dir, output_file): assert os.path.isdir(sequence_dir), "Invalid sequence dir: {}".format(sequence_dir) seq_info = gather_sequence_info(sequence_dir, None) print("Start to handle sequence: {} (image size: {}, frame {} - {})".format( seq_info["sequence_name"], seq_info["image_size"], seq_info["min_frame_idx"], seq_info["max_frame_idx"])) start_time = time.time() frame_cnt = 0 results = [] for frame in range(seq_info["min_frame_idx"], seq_info["max_frame_idx"] + 1): frame_image = seq_info["image_filenames"][frame] frame_cnt += 1 image = cv2.imread(frame_image) detection_result = self.detector.run(frame_image)["results"][1] xywh, conf = Detector._bbox_to_xywh_cls_conf(detection_result, self.args.min_confidence) output = self.deepsort.update(xywh, conf, image) for x1, y1, x2, y2, track_id in output: results.append(( frame, track_id, x1, y1, x2 - x1, y2 - y1 # tlwh )) elapsed_time = time.time() - start_time print("Frame {:05d}, Time {:.3f}s, FPS {:.3f}".format( frame_cnt, elapsed_time, frame_cnt / elapsed_time)) if self.debug: detect_xyxy = detection_result[detection_result[:, 4] > self.args.min_confidence, :4] detect_image = draw_bboxes(image, detect_xyxy) cv2.imwrite(os.path.join(self.args.debug_dir, "{}-{:05}-detect.jpg".format(seq_info["sequence_name"], frame)), detect_image) if len(output) == 0: continue image = cv2.imread(frame_image) track_image = draw_bboxes(image, output[:, :4], output[:, -1]) cv2.imwrite(os.path.join(self.args.debug_dir, "{}-{:05}-track.jpg".format(seq_info["sequence_name"], frame)), track_image) print("Done. Now write output to {}".format(args.output_file)) with open(output_file, mode="w") as f: for row in results: f.write("%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1\n" % ( row[0], row[1], row[2], row[3], row[4], row[5]))
def tracking_box(img_seq, proposals_seq, instances_seq, tracking_seq, box): # box: [x, y, w, h] output_seq = [] for i in range(len(img_seq)): img = img_seq[i].copy() proposals_list = proposals_seq[i] instances_list = instances_seq[i] tracking_list = tracking_seq[i] n_pedestrians_inside = 0 n_pedestrians_enter = 0 n_pedestrians_leave = 0 draw_rect(img, box) img = draw_bboxes(img, proposals_list, instances_list) for j in range(len(proposals_list)): proposal = proposals_list[j] instance = instances_list[j] track_info = tracking_list[j] x_outside, y_outside, x_inside, y_inside = check_overlap_xy(box, proposal) # print((x_outside, y_outside, x_inside, y_inside)) if x_outside or y_outside: pass elif x_inside and y_inside: n_pedestrians_inside += 1 else: box_cx, box_cy = get_center(box) proposal_cx, proposal_cy = get_center(proposal) proposal_vx, proposal_vy = get_center_velocity(track_info) if not x_inside and y_inside: if abs(proposal_vx) > abs(proposal_vy): b_c, p_c, p_v = box_cx, proposal_cx, proposal_vx else: b_c, p_c, p_v = box_cy, proposal_cy, proposal_vy elif not x_inside: b_c, p_c, p_v = box_cx, proposal_cx, proposal_vx elif not y_inside: b_c, p_c, p_v = box_cy, proposal_cy, proposal_vy if is_leaving(b_c, p_c, p_v): n_pedestrians_leave += 1 else: n_pedestrians_enter += 1 text = "Inside: " + str(n_pedestrians_inside) + " Entering: " + str(n_pedestrians_enter) + " Leaving: " + str( n_pedestrians_leave) cv2.putText(img, text, (0, 20), cv2.FONT_HERSHEY_PLAIN, 1, [0, 0, 255], 2) output_seq.append(img) return output_seq
def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 while self.vdo.grab(): frame_no +=1 start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] #start_center = time.time() person_id = 1 confidence = 0.5 # only person ( id == 1) bbox = self.detector.run(im)['results'][person_id] #bbox = ret['results'][person_id] bbox = bbox[bbox[:, 4] > confidence, :] #box_info = ret['results'] bbox[:, 2] = bbox[:, 2] - bbox[:, 0] #+ (bbox[:, 2] - bbox[:, 0]) /2 bbox[:, 3] = bbox[:, 3] - bbox[:, 1] #+ (bbox[:, 3] - bbox[:, 1]) /2 #start_deep_sort = time.time() cls_conf = bbox[:, 4] if frame_no == 10 : print('a') outputs = self.deepsort.update(bbox[:,:4], cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort))) print("centernet time: {}s, fps: {}".format(end - start, 1 / (end - start))) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 avg_fps = 0.0 #while self.vdo.grab(): while self.vdo.more(): frame_no += 1 start = time.time() #_, ori_im = self.vdo.retrieve() ori_im = self.vdo.read() #im = ori_im[ymin:ymax, xmin:xmax] im = ori_im #im = ori_im[ymin:ymax, xmin:xmax, :] #start_center = time.time() results = self.detector.run(im)['results'] bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results) if bbox_xywh is not None: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort))) fps = 1 / (end - start) avg_fps += fps print("centernet time: {}s, fps: {}, avg fps : {}".format( end - start, fps, avg_fps / frame_no)) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im) # do a bit of cleanup cv2.destroyAllWindows() self.vdo.stop()
def img_callback(self, dataset_img): if len(self.detections_loaded) > 0: detections_current_frame = self.detections_loaded[ self.detections_loaded["frame_no_cam"] == dataset_img.frame_no_cam] scores = detections_current_frame["score"].tolist() bboxes_xtlytlwh = list( zip(detections_current_frame["x"], detections_current_frame["y"], detections_current_frame["w"], detections_current_frame["h"])) else: bboxes_xtlytlwh, scores = self.detector.detect(dataset_img.img) self.store_detections_one_frame(dataset_img.frame_no_cam, bboxes_xtlytlwh, scores) self.detections_frame_nos.append( {"frame_no_cam": dataset_img.frame_no_cam}) draw_img = dataset_img.img if bboxes_xtlytlwh is not None: outputs = self.deep_sort.update(bboxes_xtlytlwh, scores, dataset_img) if len(outputs) > 0: bboxes_xtylwh = outputs[:, :4] bboxes_xyxy = [ xtylwh_to_xyxy(bbox_xtylwh, dataset_img.img_dims) for bbox_xtylwh in bboxes_xtylwh ] identities = outputs[:, -2] detection_idxs = outputs[:, -1] draw_img = draw_bboxes(dataset_img.img, bboxes_xyxy, identities) for detection_idx, person_id, bbox in zip( detection_idxs, identities, bboxes_xyxy): print('%d,%d,%d,%d,%d,%d,%d,%d' % (dataset_img.frame_no_cam, dataset_img.cam_id, person_id, detection_idx, int(bbox[0]), int( bbox[1]), int(bbox[2]), int(bbox[3])), file=self.track_results_file) if self.cfg.general.display_viewer: cv2.imshow("Annotation Viewer", draw_img) cv2.waitKey(1)
def tracking(queue_items: mp.Queue, area): txt_writer = open(txt_path, 'wt') deepsorts = [] for i in range(5): deepsort = DeepSort("deep/checkpoint/ckpt.t7") deepsort.extractor.net.share_memory() deepsorts.append(deepsort) xmin, ymin, xmax, ymax = area while True: try: queue_item = queue_items.get(block=True, timeout=3) except queue.Empty: print('Empty queue. End?') break batch_results = queue_item.detect_results imgs = queue_item.imgs ori_imgs = queue_item.ori_imgs frame_ids = queue_item.frame_ids for batch_idx, results in enumerate(batch_results): # frame by frame for class_id in [1, 2, 3, 4]: bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results, class_id) if (bbox_xywh is not None) and (len(bbox_xywh) > 0): outputs = deepsorts[class_id].update( bbox_xywh, cls_conf, imgs[batch_idx]) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] offset = (xmin, ymin) if is_write: ori_im = draw_bboxes(ori_imgs[batch_idx], bbox_xyxy, identities, class_id, offset=(xmin, ymin)) for i, box in enumerate(bbox_xyxy): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] idx = int( identities[i]) if identities is not None else 0 txt_writer.write( f'{frame_ids[batch_idx]} {class_id} {idx} {x1} {y1} {x2} {y2}\n' ) txt_writer.close()
def detect(self, class_list): # 参数 self.vdo.retrieve()[1] 开始时间 xmin, ymin, xmax, ymax = self.area class_name = self.class_names need = self.need return_result = [] for class_one in class_list: ori_im = class_one.ori_im im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] index = class_one.index start_time = class_one.start_time # 用于检测物体 bbox_xywh, cls_conf, cls_ids = self.yolo3(im) if bbox_xywh is not None: # mask = cls_ids == 1 # 用于在图片上画框 mask = [i in need for i in cls_ids] all_name = [ str(index + 1) + '_' + class_name[int(i)] for i in cls_ids if i in need ] bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3] *= 1.2 cls_conf = cls_conf[mask] outputs, total_name, stay_time = self.deepsort_arr[ index].update(bbox_xywh, cls_conf, im, all_name, start_time) #没有人超过违规时间 if (len(stay_time) == 0): continue if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] # 可以通过这里来记录时间,因为这里可以查看当前对象的id ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, total_name, offset=(xmin, ymin)) # part={1:index,2:ori_im,3:stay_time[0],4:stay_time[0:self.bad_time_object]} part = [ index, ori_im, stay_time[0], stay_time[0:self.bad_time_object] ] return_result.append(part) return return_result
def detect(self): xmin, ymin, xmax, ymax = self.area frame_no = 0 avg_fps = 0.0 while self.vdo.grab(): if frame_no > 1800: break frame_no +=1 start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax] #im = ori_im[ymin:ymax, xmin:xmax, :] #start_center = time.time() results = self.detector.run(im)['results'] bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results) if bbox_xywh is not None: outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() #print("deep time: {}s, fps: {}".format(end - start_deep_sort, 1 / (end - start_deep_sort))) fps = 1 / (end - start ) avg_fps += fps if frame_no % 600 == 1: print("obj {}, centernet time: {}s, fps: {}, avg fps : {}".format(len(bbox_xywh) ,end - start, fps, avg_fps/frame_no)) # cv2.imshow("test", ori_im) # cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
def detect(self): count = 0 while self.vdo.grab(): start = time.time() re, ori_im = self.vdo.retrieve() if re == True: count += 1 im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im ret = self.centernet.run(ori_im) confidences = [] if ret['results'] is not None: for box in ret['results'][1]: for i in range(4): if box[i] < 0: box[i] = 0 confidences.append(box[4]) ret['results'][1] = np.delete(ret['results'][1], 4, axis=1) outputs = self.deepsort.update(ret['results'][1], confidences, im) if len(outputs) > 0: count += 1 bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) cv2.imwrite( "/home/vietthangtik15/dataset/output/" + str(count) + ".jpg", ori_im) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) if self.args.save_path: self.output.write(ori_im) else: break
def detect(self): xmin, ymin, xmax, ymax = self.area while self.vdo.grab(): start = time.time() _, ori_im = self.vdo.retrieve() im = ori_im[ymin:ymax, xmin:xmax, (2, 1, 0)] bbox_xywh, cls_conf, cls_ids = self.yolo3(im) #bbox_xyxy = torch.zeros_like(bbox_xywh, dtype=bbox_xywh.dtype) #bbox_xyxy[0] = bbox_xywh[:,0]-bbox_xywh[:,2]/2 #bbox_xyxy[1] = bbox_xywh[:,1]-bbox_xywh[:,3]/2 #bbox_xyxy[2] = bbox_xywh[:,0]+bbox_xywh[:,2]/2 #bbox_xyxy[3] = bbox_xywh[:,1]+bbox_xywh[:,3]/2 if bbox_xywh is not None: mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities, offset=(xmin, ymin)) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) cv2.imshow("test", ori_im) cv2.waitKey(1) if self.write_video: self.output.write(ori_im)
def detect(self, opt): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.debug = max(opt.debug, 1) #Detector = detector_factory[opt.task] #""" Detector = CtdetDetector detector = Detector(opt) detector.pause = False #""" img_index = 0 while True: img_index = img_index + 1 print("img_imdex: ", img_index) if self.from_video: _, ori_im = self.vdo.read() start = time.time() #_, ori_im = self.vdo.retrieve() #im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) else: start = time.time() path = self.path list_dir = os.listdir(path) slist = sorted(list_dir) join = os.path.join(path, slist[img_index]) ori_im = cv2.imread(join) img_index = img_index + 1 im = ori_im dm = copy.deepcopy(ori_im) #""" ret = detector.run(im) xc = abs(ret[:, 0] + ret[:, 2]) / 2.0 yc = abs(ret[:, 1] + ret[:, 3]) / 2.0 w = abs(ret[:, 0] - ret[:, 2]) h = abs(ret[:, 1] - ret[:, 3]) cls_conf = ret[:, 4] bbox_xcycwh = np.column_stack((xc, yc, w, h)) bbox_x1y1x2y2_conf = np.concatenate((np.floor( ret[:, 0:4]), np.transpose(np.expand_dims(ret[:, 4], axis=0))), axis=1) #""" #bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) #print(type(bbox_xcycwh),':',type(cls_conf),':',type(cls_ids)) if bbox_xcycwh is not None: #uncomment for using yolo """ mask = cls_ids==0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:,2] *= 1.1 cls_conf = cls_conf[mask] """ outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) s_size = 1.3 d_size = 2 ##double window resolution adjusting screen_res = 768, 1366 #screen_res = 900, 1650 screen_res = screen_res[0] / 1.2, screen_res[1] / 1.2 scale = max(screen_res[0] / ori_im.shape[0], screen_res[1] / ori_im.shape[1]) #################################################### if self.single_display: #cv2.namedWindow('image',cv2.WINDOW_NORMAL) #cv2.resizeWindow('image',(int((ori_im.shape[1])*scale),int((ori_im.shape[0])*scale))) if not (self.only_detect): ## ori_img : not detections, our tracking ones #cv2.imshow('image', ori_im) #if cv2.waitKey(0 if self.pause else 1) == 27: # sys.exit(0) if self.args.save_path: re_ori_im = self.save_res(ori_im) self.output.write(re_ori_im) else: ## dm : detection only dm = draw_simple_bboxes(dm, bbox_x1y1x2y2_conf) cv2.imshow('image', dm) if cv2.waitKey(0 if self.pause else 1) == 27: sys.exit(0) if self.args.save_path: self.output.write(dm) else: both = np.concatenate((dm, ori_im), axis=1) cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.resizeWindow('image', (int((both.shape[1] * scale / 1.7)), int((both.shape[0] * scale / 1.7)))) cv2.imshow('image', both) if cv2.waitKey(0 if self.pause else 1) == 27: sys.exit(0) if self.args.save_path: re_ori_im = self.save_res(ori_im) self.output.write(ori_im)
def detect(self): #xmin, ymin, xmax, ymax = self.area jump_flag = 1 start = time.time() while self.vdo.grab(): #multicore #pool = mp.Pool(processes=6) #6-core _, ori_im = self.vdo.retrieve() im_height, im_width = ori_im.shape[:2] x_max = 5 y_max = 5 x_grid = int(im_width / x_max) y_grid = int(im_height / y_max) display_im = ori_im # for i in range(1, x_max + 1): # cv2.line(ori_im, (x_grid * i, 0), (x_grid * i, im_height), (0, 255, 255), 3) # for i in range(1, y_max + 1): # cv2.line(ori_im, (0, y_grid * i), (im_width, y_grid * i), (0, 255, 255), 3) # for i in range(len(unseen_frame)): # if unseen_frame[i] > -1: # unseen_frame[i] += 1 if jump_flag%2 ==0 : #jump frame #start = time.time() clientsocket = socket(AF_INET,SOCK_STREAM) clientsocket.connect(('140.114.79.179',10523)) clientsocket.send(pickle.dumps(user_entry_dict)) im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) #img = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) cv2.circle(ori_im, (3900, 2100), 50, (255,0,0),-1) if bbox_xcycwh is not None: # select class person mask = cls_ids==0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:,3:] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) for output in outputs: if output[4] > len(people_path): for i in range(0, output[4] - len(people_path)): people_path.append([]) direction_start.append(0) unseen_frame.append(-1) people_path[output[4] - 1].append(np.array(([(output[0] + output[2]) / 2, output[3]]))) coordinate = output[:4] bbox_area = get_bbox_area(coordinate) features = [] if bbox_area > area_threshold : try : if area_dic[output[-1]] < bbox_area : area_dic[output[-1]] = bbox_area roiImg = im[output[:4][1]:output[:4][3],output[:4][0]:output[:4][2]] #img[y, x] features = mask_ouput(roiImg) #features=[[t-shirt, 0.9, [coordination]],...] features = merge_color(roiImg, features) #result = pool.apply_async(subroi,(ori_im,output)) #results.append(result) #for wait() print("re: ---------------",features) except KeyError: area_dic.setdefault(output[-1],bbox_area) roiImg = im[output[:4][1]:output[:4][3],output[:4][0]:output[:4][2]] #img[y, x] features = mask_ouput(roiImg) #features=[[t-shirt, 0.9, [coordination]],...] features = merge_color(roiImg, features) #result = pool.apply_async(subroi,(ori_im,output)) #results.append(result) print("wait---------------") if output[-1] not in user_entry_dict: user_entry_dict.setdefault(output[-1],[features,exix_point,CAMERA_ID,[]]) #add entry id else: for feature in features: flag = 1 for i in range(len(user_entry_dict[output[-1]][0])): if feature[0] in user_entry_dict[output[-1]][0][i]: user_entry_dict[output[-1]][0][i][1] = max(user_entry_dict[output[-1]][0][i][1],feature[1]) #update the confodence flag = 0 if flag == 1 : user_entry_dict[output[-1]][0].append(feature) print(user_entry_dict) #call project.py find_grids( output, [x_grid, y_grid], 0.3, user_entry_dict[output[-1]]) x = [] y = [] for i in range(direction_start[output[4] - 1], len(people_path[output[4] - 1])): x.append(people_path[output[4] - 1][i][0]) y.append(people_path[output[4] - 1][i][1]) path_x = (output[0] + output[2]) / 2 path_y = output[3] if(len(x) > 1): a, b, c = pu.cal_simple_linear_regression_coefficients(x, y) #print(abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b)) if abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b) > 200 and unseen_frame[output[4] - 1] < 10: continue; if abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b) < distance_threshold: #print("projection") path_x, path_y = pu.find_projection(a, b, c, path_x, path_y) if len(people_path[output[4] - 1]) > 0: prev_x = people_path[output[4] - 1][len(people_path[output[4] - 1]) - 1][0] prev_y = people_path[output[4] - 1][len(people_path[output[4] - 1]) - 1][1] velocity = math.sqrt((path_x - prev_x) * (path_x - prev_x) + (path_y - prev_y) * (path_y - prev_y)) * 30 / (unseen_frame[output[4] - 1] + 1) #print("velocity: {}".format(velocity)) else: #print("turn") direction_start[output[4] - 1] = len(people_path[output[4] - 1]) people_path[output[4] - 1].append(np.array((path_x, path_y))) unseen_frame[output[4] - 1] = 0 if len(outputs) > 0: bbox_xyxy = outputs[:,:4] identities = outputs[:,-1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) for id in identities: for i in range(1, len(people_path[id-1])): cv2.line(ori_im, (int(people_path[id-1][i-1][0]), int(people_path[id-1][i-1][1])), (int(people_path[id-1][i][0]), int(people_path[id-1][i][1])), (0, 0, 255), 3) #pool.close() #pool.join() # for result in results: # print(result.get()) #end = time.time() #print("time: {}s, fps: {}".format(end-start, 1/(end-start))) print(area_dic) jump_flag+=1 if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im) end = time.time() print(end-start)
def main(): args = get_parser().parse_args() if args.display: cv2.namedWindow("out_vid", cv2.WINDOW_NORMAL) cv2.resizeWindow("out_vid", 960, 720) sort = Sort() deepsort = DeepSort(args.deepsort_checkpoint, nms_max_overlap=args.nms_max_overlap, use_cuda=bool(strtobool(args.use_cuda))) assert os.path.isfile( os.path.join(args.input, 'via_export_json.json' )), "Error: path error, via_export_json.json not found" ''' if args.out_vid: out_vid = cv2.VideoWriter( filename=args.out_vid, fourcc=cv2.VideoWriter_fourcc(*'MJPG'), fps=args.fps, frameSize=(1920, 1440), ) ''' if args.out_txt: out_txt = open(args.out_txt, "w+") total_counter = [0] * 1000 json_file = os.path.join(args.input, 'via_export_json.json') with open(json_file) as f: imgs_anns = json.load(f) for idx, v in tqdm(enumerate(imgs_anns.values()), total=len(imgs_anns.values())): filename = os.path.join(args.input, v["filename"]) annos = v["regions"] polys = [] dets = [] for anno in annos: region_attributes = anno["region_attributes"] if not region_attributes: break anno = anno["shape_attributes"] if anno["name"] != "polygon": break px = anno["all_points_x"] py = anno["all_points_y"] poly = np.array([[x, y] for x, y in zip(px, py)], np.int32).reshape((-1, 1, 2)) if int(region_attributes["category_id"]): dets.append( [np.min(px), np.min(py), np.max(px), np.max(py), 1]) polys.append(poly) start = time.time() im = cv2.imread(filename) current_counter = [] if args.tracker == 'sort': if len(dets): dets = np.array(dets) else: dets = np.empty((0, 5)) outputs = sort.update(dets) outputs = np.array([element.clip(min=0) for element in outputs]).astype(int) else: if len(dets): ccwh_boxes = [] for det in dets: ccwh_boxes.append([(det[0] + det[2]) / 2, (det[1] + det[3]) / 2, det[2] - det[0], det[3] - det[1]]) ccwh_boxes = np.array(ccwh_boxes) confidences = np.ones(len(dets)) outputs, __ = deepsort.update(ccwh_boxes, confidences, im) else: outputs = [] if len(outputs): tlbr_boxes = outputs[:, :4] identities = current_counter = outputs[:, -1] ordered_identities = [] for identity in identities: if not total_counter[identity]: total_counter[identity] = max(total_counter) + 1 ordered_identities.append(total_counter[identity]) im = draw_bboxes(im, tlbr_boxes, ordered_identities, binary_masks=[]) if args.out_txt: for i in range(len(ordered_identities)): tlbr = tlbr_boxes[i] line = [ idx + 1, ordered_identities[i], tlbr[0], tlbr[1], tlbr[2] - tlbr[0], tlbr[3] - tlbr[1], 1, 1, 1 ] out_txt.write(",".join(str(item) for item in line) + "\n") end = time.time() im = draw_polys(im, polys) im = cv2.putText(im, "Frame ID: " + str(idx), (20, 20), 0, 5e-3 * 200, (0, 255, 0), 2) time_fps = "Time: {}s, fps: {}".format(round(end - start, 2), round(1 / (end - start), 2)) im = cv2.putText(im, time_fps, (20, 60), 0, 5e-3 * 200, (0, 255, 0), 3) im = cv2.putText(im, 'Groundtruth2' + args.tracker, (20, 100), 0, 5e-3 * 200, (0, 255, 0), 3) im = cv2.putText(im, "Current Hand Counter: " + str(len(current_counter)), (20, 140), 0, 5e-3 * 200, (0, 255, 0), 2) im = cv2.putText(im, "Total Hand Counter: " + str(max(total_counter)), (20, 180), 0, 5e-3 * 200, (0, 255, 0), 2) if args.display: cv2.imshow("out_vid", im) cv2.waitKey(1) '''
def detect(self): dt = 0.1 frame_no = 0.0 time_prev = 0.0 #self.kitti_timestamps[0].split('\n')[0].split(' ')[1] t_prev = np.array([0., 0., 0.]) mot_tracker = Sort(max_age=3, min_hits=3, iou_threshold=0.5) for idx, sequence_list in enumerate(self.matches): start = time.time() frame_no += 1.0 if (self.dataset == 'tum'): rgb_name, depth_name, odom_name = sequence_list rgb_name = str(rgb_name) if len(rgb_name) < 17: rgb_name += '0' * (17 - len(rgb_name)) img_path = os.path.join(self.rgb_path, rgb_name + '.png') depth_path = os.path.join(self.depth_path, str(depth_name) + '.png') oxt = self.third_list[odom_name] t = np.array(oxt[:3]).astype('float32') q = np.array(oxt[3:]).astype('float32') if (os.path.isfile(depth_path)): depth_im = Image.open(depth_path) depth_im = np.asarray(depth_im) / self.depth_factor else: time_prev = odom_name continue r = R.from_quat(q).as_dcm() t_prev = t time_prev = odom_name T = np.array([[r[0, 0], r[0, 1], r[0, 2], t[0]], [r[1, 0], r[1, 1], r[1, 2], t[1]], [r[2, 0], r[2, 1], r[2, 2], t[2]], [0.0, 0.0, 0.0, 1.0]]) elif (self.dataset == 'kitti'): rgb_name = sequence_list img_path = os.path.join( './seq/' + self.sequence_name + '/image_02/data/', rgb_name) oxt_path = os.path.join( './seq/' + self.sequence_name + '/oxts/data/', rgb_name.split('.')[0] + '.txt') depth_path = os.path.join( './seq/' + self.sequence_name + '/proj_depth/groundtruth/image_02/', rgb_name) r = np.array( self.kitti_odom[idx - 1].split('\n')[0].split(' ')).reshape( 3, 4).astype('float32') T = np.vstack((r, [0., 0., 0., 1.])) if (os.path.isfile(depth_path)): depth_im = depth_read(depth_path) else: continue if (time_prev != 0): dt = odom_name - time_prev im = np.asarray(Image.open(img_path)) bbox_xcycwh, cls_conf, cls_ids, dets, masks = self.detectron2.detect( im) dets = self.findDepth(dets, cls_ids, masks, T, depth_im) if (len(dets) == 0): outputs = mot_tracker.update(delta=dt) else: outputs = mot_tracker.update(dets, delta=dt) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] box3d_state = outputs[:, 4:10] maskIOUs = outputs[:, 10] cls_ids = outputs[:, 11].astype('int') identities = outputs[:, 12] masks_obj = outputs[:, 13] is_static = self.findStaticDyanmic(box3d_state, cls_ids, maskIOUs) im = draw_bboxes(im, bbox_xyxy, identities, box3d_state[:, 3:], is_static) save_path = './masks/' mask = np.zeros_like(depth_im) for i in range(len(is_static)): if (is_static[i] == 0): mask = mask + masks_obj[i] mask = mask.astype('bool').astype('int') * 255 if (self.save_mask): cv2.imwrite(save_path + rgb_name + '_mask.png', mask) end = time.time() dt = np.round(end - start, 2) print("time: {}: {}s, fps: {}".format(frame_no, end - start, 1 / (end - start))) self.display = True if self.display: cv2.imshow("test", im) cv2.waitKey(10) '''
def main(): args = get_parser().parse_args() if args.display: cv2.namedWindow("out_vid", cv2.WINDOW_NORMAL) cv2.resizeWindow("out_vid", 960, 720) sort = Sort() deepsort = DeepSort(args.deepsort_checkpoint, nms_max_overlap=args.nms_max_overlap, use_cuda=bool(strtobool(args.use_cuda))) assert os.path.isfile( args.input), "Error: path error, input file not found" if args.out_vid: out_vid = cv2.VideoWriter( filename=args.out_vid, fourcc=cv2.VideoWriter_fourcc(*'MJPG'), fps=args.fps, frameSize=(1920, 1440), ) if args.out_txt: out_txt = open(args.out_txt, "w+") total_counter = [0] * 1000 inp_vid = cv2.VideoCapture(args.input) num_frames = int(inp_vid.get(cv2.CAP_PROP_FRAME_COUNT)) predictor = DefaultPredictor(setup_cfg(args)) for frameID in tqdm(range(num_frames)): ret, im = inp_vid.read() start = time.time() dets, masks, region = detectron2(im, args, predictor) if args.region_based: im = region if args.tracker == 'sort': if len(dets): dets = np.array(dets) else: dets = np.empty((0, 5)) outputs = sort.update(dets) outputs = np.array([element.clip(min=0) for element in outputs]).astype(int) else: if len(dets): ccwh_boxes = [] for det in dets: ccwh_boxes.append([(det[0] + det[2]) / 2, (det[1] + det[3]) / 2, det[2] - det[0], det[3] - det[1]]) ccwh_boxes = np.array(ccwh_boxes) confidences = np.ones(len(dets)) outputs, __ = deepsort.update(ccwh_boxes, confidences, im) else: outputs = [] current_counter = [] if len(outputs): tlbr_boxes = outputs[:, :4] identities = current_counter = outputs[:, -1] ordered_identities = [] for identity in identities: if not total_counter[identity]: total_counter[identity] = max(total_counter) + 1 ordered_identities.append(total_counter[identity]) im = draw_bboxes(im, tlbr_boxes, ordered_identities, binary_masks=masks) if args.out_txt: for i in range(len(ordered_identities)): tlbr = tlbr_boxes[i] line = [ frameID + 1, ordered_identities[i], tlbr[0], tlbr[1], tlbr[2] - tlbr[0], tlbr[3] - tlbr[1], 1, 1, 1 ] out_txt.write(",".join(str(item) for item in line) + "\n") end = time.time() im = cv2.putText(im, "Frame ID: " + str(frameID + 1), (20, 30), 0, 5e-3 * 200, (0, 255, 0), 2) time_fps = "Time: {}s, fps: {}".format(round(end - start, 2), round(1 / (end - start), 2)) im = cv2.putText(im, time_fps, (20, 60), 0, 5e-3 * 200, (0, 255, 0), 3) im = cv2.putText( im, os.path.basename(args.config_file) + ' ' + args.tracker, (20, 90), 0, 5e-3 * 200, (0, 255, 0), 3) im = cv2.putText(im, "Current Hand Counter: " + str(len(current_counter)), (20, 120), 0, 5e-3 * 200, (0, 255, 0), 2) im = cv2.putText(im, "Total Hand Counter: " + str(max(total_counter)), (20, 150), 0, 5e-3 * 200, (0, 255, 0), 2) if args.display: cv2.imshow("out_vid", im) cv2.waitKey(1) if args.out_vid: out_vid.write(im) frameID += 1
def detect(self): start = time.time() if not args.image_input: start_second = 0 end_second = 8 fps = self.vdo.get(cv2.CAP_PROP_FPS) print('fps: ', fps) start_frameid = start_second * fps end_frameid = end_second * fps else: frame_id = 0 if self.args.update_tracks: shutil.copytree(self.args.detections_dir, self.args.detections_dir + '_tracked') while True: print(f'FRAME_ID: {frame_id}') logging.debug(f'FRAME_ID: {frame_id}') new_sequence = False if not args.image_input: frame_id = int(round(self.vdo.get(1))) if frame_id < start_frameid: continue elif frame_id > end_frameid: break _, ori_im = self.vdo.read() # retrieve() else: if frame_id>=(len(self.img_list)): break if frame_id > 1: prev_im = ori_im ori_im = cv2.imread(self.img_list[frame_id]) if frame_id > 1: im1_gray = cv2.cvtColor(prev_im, cv2.COLOR_RGB2GRAY) im2_gray = cv2.cvtColor(ori_im, cv2.COLOR_RGB2GRAY) cc, _ = cv2.findTransformECC(im1_gray, im2_gray, warp_matrix, warp_mode, criteria, None, 1) new_sequence = cc < args.ecc_threshold logging.debug(f'ECC: {cc}') frame_id+=1 logging.debug(f'NEW_SEQUENCE: {new_sequence}') if self.args.save_frames: if not args.image_input: cv2.imwrite(f'./supervisely/img/img_{frame_id:05}.jpg', ori_im) else: cv2.imwrite(f'./supervisely/img/' + self.img_list[frame_id-1][-13:], ori_im) im = ori_im predictions = self.predictor(im) instances = predictions["instances"] if instances.pred_classes.numel() > 0: #print(instances.pred_classes) mask = instances.pred_classes == 0 scores = instances.scores[mask] pred_boxes = instances.pred_boxes[mask] xcyc = pred_boxes.get_centers() wh = pred_boxes.tensor[:, 2:] - pred_boxes.tensor[:, :2] + torch.ones(pred_boxes.tensor[:, 2:].size()).cuda() wh_min, _ = torch.min(wh, 1) # if "pred_masks" in instances.keys(): # pred_masks = instances["pred_masks"][mask] bbox_xcycwh = torch.cat((xcyc, wh), 1)[wh_min >=4].detach().cpu().numpy() cls_conf = scores.detach().cpu().numpy() if self.args.detections_dir!="": ann_dir = os.path.join(self.args.detections_dir) ann = os.path.basename(self.img_list[frame_id-1]) + ".json" ann_path = os.path.join(ann_dir, 'MOT', 'ann', ann) with open(ann_path) as f: ann_dict = json.load(f) bboxes = [] for obj in ann_dict['objects']: bbox = obj["points"]["exterior"] bbox = bbox[0]+bbox[1] bbox = [min(bbox[0], bbox[2]), min(bbox[1], bbox[3]), max(bbox[0], bbox[2]), max(bbox[1], bbox[3])] bboxes.append([(bbox[2]+bbox[0])/2, (bbox[3]+bbox[1])/2, bbox[2]-bbox[0], bbox[3]-bbox[1]]) bbox_xcycwh = np.array(bboxes) cls_conf = np.ones(bbox_xcycwh.shape[0]) #print(bbox_xcycwh, cls_conf) #bbox_xcycwh[:, 3:] *= 1.2 outputs, detections = self.deepsort.update(bbox_xcycwh, cls_conf, im, new_sequence, frame_id-1, self.img_list[frame_id-1]) self.deepsort.export('/content') if len(outputs) > 0: bbox_xyxy = outputs[:, :4] #dh = ((0.1/1.2)*(bbox_xyxy[:,3]-bbox_xyxy[:,1])).astype(int) #bbox_xyxy[:,1] += dh #bbox_xyxy[:,3] -= dh identities = outputs[:, 4] match_method = outputs[:, 5] number = outputs[:, 6] number_bbox = outputs[:, 7:11] detection_id = outputs[:, 11] min_cost = outputs[:, 12] draw_im = draw_bboxes(frame_id, new_sequence, ori_im, bbox_xyxy, identities, match_method, number, number_bbox, detection_id, min_cost) if self.args.save_txt: for j in range(bbox_xyxy.shape[0]): x1 = bbox_xyxy[j,0] y1 = bbox_xyxy[j,1] x2 = bbox_xyxy[j,2] y2 = bbox_xyxy[j,3] self.txt.write(f'{frame_id},{identities[j]},{x1},{y1},{x2-x1},{y2-y1},1,0,-1,-1\n') if self.args.update_tracks: ann_path = os.path.join(self.args.detections_dir + '_tracked', 'MOT', 'ann', ann) print(ann_path) for idx, obj in enumerate(ann_dict['objects']): obj["tags"] = [{"name": "track_id", "value": detections[idx].track_id}] with open(ann_path, 'w') as f: json.dump(ann_dict, f) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) if self.args.display: cv2.imshow("test", draw_im) cv2.waitKey(1) if self.args.save_path: self.output.write(draw_im)
break start = time.time() xmin, ymin, xmax, ymax = 0, 0, width, height im = frame[ymin:ymax, xmin:xmax, (2,1,0)] bbox_xywh, cls_conf, cls_ids = yolo3(im) if bbox_xywh is not None: mask = cls_ids==0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:,3] *= 1.2 cls_conf = cls_conf[mask] outputs = deepsort.update(bbox_xywh, cls_conf, im) if len(outputs) > 0: bbox_xyxy = outputs[:,:4] identities = outputs[:,-1] frame = draw_bboxes(frame, bbox_xyxy, identities, offset=(xmin,ymin)) end = time.time() print("time: {}s, fps: {}".format(end-start, 1/(end-start))) video_writer.write(frame) video_capture.release() video_writer.release() # convert AVI to MP4 !ffmpeg -y -loglevel info -i output.avi output.mp4 else: print("can't open the given input video file!") show_local_mp4_video('output.mp4', width=900, height=700)
def detect_callback(self, rgb_im, depth_im): # all image files dt = 0.1 frame_no = 0.0 time_prev = 0.0 #self.kitti_timestamps[0].split('\n')[0].split(' ')[1] t_prev = np.array([0., 0., 0.]) mot_tracker = Sort(max_age=3, min_hits=3, iou_threshold=0.5) for idx, sequence_list in enumerate(self.matches): start = time.time() frame_no += 1.0 # if(self.dataset == 'tum'): # rgb_name , depth_name, odom_name = sequence_list # rgb_name = str(rgb_name) # if len(rgb_name) < 17: # rgb_name += '0'*(17 - len(rgb_name)) # img_path = os.path.join( self.config['DATASET']['TUM']['RGB_PATH'], rgb_name + '.png') # depth_path = os.path.join(self.config['DATASET']['TUM']['DEPTH_PATH'], str(depth_name) + '.png') # oxt = self.third_list[odom_name] # t = np.array(oxt[:3]).astype('float32') # q = np.array(oxt[3:]).astype('float32') # if (os.path.isfile(depth_path)): # depth_im = Image.open(depth_path) # depth_im = np.asarray(depth_im)/self.config['DATASET']['TUM']['DEPTH_FACTOR'] # else: # time_prev = odom_name # continue # r = R.from_quat(q).as_dcm() # t_prev = t # time_prev = odom_name # T = np.array([[r[0,0], r[0,1], r[0,2], t[0]], # [r[1,0], r[1,1], r[1,2], t[1]], # [r[2,0], r[2,1], r[2,2], t[2]], # [0.0,0.0,0.0,1.0]]) # elif(self.dataset == 'kitti'): # im_name = sequence_list # img_path = os.path.join('./seq/2011_09_30_drive_0018_sync/image_02/data/', im_name) # oxt_path = os.path.join('./seq/2011_09_26/2011_09_26_drive_0009/oxts/data/', im_name.split('.')[0] + '.txt') # depth_path = os.path.join('./seq/2011_09_30_drive_0018_sync/proj_depth/groundtruth/image_02/', im_name) # r = np.array(self.kitti_odom[idx-1].split('\n')[0].split(' ')).reshape(3,4).astype('float32') # T = np.vstack((r,[0.,0.,0.,1.])) # if (os.path.isfile(depth_path)): # depth_im = depth_read(depth_path) # else: # continue # if(time_prev != 0): # dt = odom_name - time_prev depth_im = np.asarray(self.bridge.cv2_to_imgmsg(depth_im, "8UC1")) print("depth array :\n", depth_im) # im = np.asarray(Image.open(img_path)) im = np.asarray(self.bridge.cv2_to_imgmsg(rgb_im, "rgb8")) T = self.pose bbox_xcycwh, cls_conf, cls_ids, dets, masks = self.detectron2.detect( im) dets = self.findDepth(dets, cls_ids, masks, T, depth_im) if (len(dets) == 0): outputs = mot_tracker.update(delta=dt) else: outputs = mot_tracker.update(dets, delta=dt) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] box3d_state = outputs[:, 4:10] maskIOUs = outputs[:, 10] cls_ids = outputs[:, 11].astype('int') identities = outputs[:, 12] masks_obj = outputs[:, 13] is_static = self.findStaticDyanmic(box3d_state, cls_ids, maskIOUs) im = draw_bboxes(im, bbox_xyxy, identities, box3d_state[:, 3:], is_static) save_path = './masks/' mask = np.zeros_like(depth_im) for i in range(len(is_static)): if (is_static[i] == 0): mask = mask + masks_obj[i] mask = mask.astype('bool').astype('int') * 255 if (self.config['SAVE_MASK']): cv2.imwrite(save_path + rgb_name + '_mask.png', mask) if (self.config['PUBLISH_ROS_TOPIC']): try: img = mask img = np.stack((img, ) * 3, -1) img = img.astype(np.uint8) grayed = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold( grayed, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] mask = thresh self.image_pub.publish( self.bridge.cv2_to_imgmsg(mask, "8UC1")) except CvBridgeError as e: print(e) end = time.time() dt = np.round(end - start, 2) print("time: {}: {}s, fps: {}".format(frame_no, end - start, 1 / (end - start))) self.display = True if self.display: cv2.imshow("test", im) cv2.waitKey(10) '''
def detect(self): cnt = 0 update_fg = True detect_fg = True total_time = 0 outputs = [] while self.vdo.isOpened(): start = time.time() _, ori_im = self.vdo.read() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = np.array([im]) if cnt % 5 == 0 or detect_fg: # bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) # mask = cls_ids==0 # bbox_xcycwh = bbox_xcycwh[mask] # bbox_xcycwh[:,3:] *= 1.2 # cls_conf = cls_conf[mask] if self.command_type == 'face': rectangles = self.mtcnn.detectFace(im, True) rectangles = rectangles[0] if len(rectangles) < 1: continue bboxes = rectangles[:, :4] bboxes = self.widerbox(bboxes) # bbox_xcycwh = self.xyxy2xcyc(bboxes) cls_conf = rectangles[:, 4] elif self.command_type == 'person': bboxes, cls_conf = self.person_detect.test_img_org(ori_im) if len(bboxes) == 0: continue bbox_xcycwh = self.xywh2xcycwh(bboxes) #outputs = bboxes #self.xywh2xyxy(bboxes) update_fg = True box_xcycah = self.xcyc2xcycah(bbox_xcycwh) self.moveTrack.track_init(box_xcycah) self.moveTrack.track_predict() self.moveTrack.track_update(box_xcycah) # detect_xywh = self.xyxy2xywh(bboxes) if self.command_type=='face' else bboxes # self.tracker_run.init(ori_im,detect_xywh.tolist()) detect_fg = False else: if len(bbox_xcycwh) > 0: start1 = time.time() self.moveTrack.track_predict() bbox_xcycwh = self.xcycah2xcyc(self.moveTrack.means_track) #outputs = self.xcycah2xyxy(self.moveTrack.means_track) # boxes_tmp = self.tracker_run.update(ori_im) # bbox_xcycwh = self.xywh2xcycwh(boxes_tmp) end1 = time.time() print('only tracker time consume:', end1 - start1) #outputs = self.xywh2xyxy(boxes_tmp) update_fg = False detect_fg = False else: detect_fg = True if len(bbox_xcycwh) > 0: outputs = self.deepsort.update(bbox_xcycwh, cls_conf, ori_im, update_fg) end = time.time() consume = end - start if len(outputs) > 0: #outputs = rectangles bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] #np.zeros(outputs.shape[0]) ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) #self.save_track_results(bbox_xyxy,ori_im,identities) print("frame: {} time: {}s, fps: {}".format( cnt, consume, 1 / (end - start))) cnt += 1 cv2.imshow("test", ori_im) c = cv2.waitKey(1) & 0xFF if c == 27 or c == ord('q'): break #if self.args.save_path: # self.output.write(ori_im) total_time += consume self.vdo.release() cv2.destroyAllWindows() print("video ave fps and total_time: ", cnt / total_time, total_time)
def detect(self): while self.vdo.grab(): start = time.time() _, im = self.vdo.retrieve() # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) print( '----------------------------------------------DEMO started-----------------------------------------------' ) bbox_xcycwh, cls_conf, cls_ids, cls_masks, bbox_xyxy_detectron2 = self.detectron2.detect( im) #print('bbox_xcycwh, cls_conf, cls_ids, cls_masks', bbox_xcycwh, cls_conf, cls_ids, cls_masks) #if bbox_xcycwh is not None: current_counter = [] if len(bbox_xcycwh): mask = cls_ids == 0 # select class person #print('mask>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>', mask) #print('bbox_xcycwh', bbox_xcycwh) bbox_xcycwh = bbox_xcycwh[mask] #print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^cls_conf', cls_conf) cls_conf = cls_conf[mask] #print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^cls_masks[mask]', cls_conf[mask]) binary_masks = cls_masks[mask] #binary_masks = cls_masks #draw detections after NMS, white box outputs, detections = self.deepsort.update( bbox_xcycwh, cls_conf, im) im = draw_detections(detections, im) print( '++++++++++++++++++++++++++++++++++++++ outputs of deepsort.update', outputs) if len(outputs): bbox_xyxy = outputs[:, :4] print( "+++++++++++++++++++++++++++++++++++++bbox_xyxy, bbox_xyxy_detectron2", bbox_xyxy, bbox_xyxy_detectron2) identities = current_counter = outputs[:, -1] #print("+++++++++++++++++++++++++++++++++++++identities", identities) ordered_identities = [] for identity in identities: if not self.total_counter[identity]: self.total_counter[identity] = max( self.total_counter) + 1 ordered_identities.append(self.total_counter[identity]) im = draw_bboxes(im, bbox_xyxy, ordered_identities, binary_masks) #nums = "len(bbox_xyxy): {}, len(identities): {}, len(binary_masks): {}".format(len(bbox_xyxy), len(identities), len(binary_masks)) #im = cv2.putText(im, nums, (150, 150), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) end = time.time() time_fps = "time: {}s, fps: {}".format(round(end - start, 2), round(1 / (end - start), 2)) im = cv2.putText( im, "Total Hand Counter: " + str(max(self.total_counter)), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) im = cv2.putText( im, "Current Hand Counter: " + str(len(current_counter)), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) im = cv2.putText(im, time_fps, (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) if self.args.display: cv2.imshow("test", im) cv2.waitKey(1) if self.args.save_path: self.output.write(im)
def detect(self): #multicore pool = mp.Pool(processes=6) #6-core #xmin, ymin, xmax, ymax = self.area jump_flag = 1 while self.vdo.grab(): _, ori_im = self.vdo.retrieve() im_height, im_width = ori_im.shape[:2] x_max = 10 y_max = 10 x_grid = int(im_width / x_max) y_grid = int(im_height / y_max) for i in range(1, x_max + 1): cv2.line(ori_im, (x_grid * i, 0), (x_grid * i, im_height), (0, 255, 255), 3) for i in range(1, y_max + 1): cv2.line(ori_im, (0, y_grid * i), (im_width, y_grid * i), (0, 255, 255), 3) for i in range(len(unseen_frame)): if unseen_frame[i] > -1: unseen_frame[i] += 1 if jump_flag % 2 == 0: #jump frame start = time.time() im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) im = ori_im bbox_xcycwh, cls_conf, cls_ids = self.yolo3(im) cv2.circle(ori_im, (3900, 2100), 50, (255, 0, 0), -1) if bbox_xcycwh is not None: # select class person mask = cls_ids == 0 bbox_xcycwh = bbox_xcycwh[mask] bbox_xcycwh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) for output in outputs: if output[4] > len(people_path): for i in range(0, output[4] - len(people_path)): people_path.append([]) direction_start.append(0) unseen_frame.append(-1) people_path[output[4] - 1].append( np.array(([(output[0] + output[2]) / 2, output[3]]))) coordinate = output[:4] bbox_area = get_bbox_area(coordinate) try: if area_dic[output[-1]] < bbox_area: area_dic[output[-1]] = bbox_area pool.apply_async(subroi, (ori_im, output)) print("---------------") except KeyError: area_dic.setdefault(output[-1], bbox_area) pool.apply_async(subroi, (ori_im, output)) print("---------------") x = [] y = [] for i in range(direction_start[output[4] - 1], len(people_path[output[4] - 1])): x.append(people_path[output[4] - 1][i][0]) y.append(people_path[output[4] - 1][i][1]) path_x = (output[0] + output[2]) / 2 path_y = output[3] if (len(x) > 1): a, b, c = pu.cal_simple_linear_regression_coefficients( x, y) #print(abs(a * path_x + b * path_y + c) / math.sqrt(a * a + b * b)) if abs(a * path_x + b * path_y + c) / math.sqrt( a * a + b * b) > 200 and unseen_frame[ output[4] - 1] < 10: continue if abs(a * path_x + b * path_y + c) / math.sqrt( a * a + b * b) < distance_threshold: #print("projection") path_x, path_y = pu.find_projection( a, b, c, path_x, path_y) if len(people_path[output[4] - 1]) > 0: prev_x = people_path[output[4] - 1][ len(people_path[output[4] - 1]) - 1][0] prev_y = people_path[output[4] - 1][ len(people_path[output[4] - 1]) - 1][1] velocity = math.sqrt( (path_x - prev_x) * (path_x - prev_x) + (path_y - prev_y) * (path_y - prev_y)) * 30 / ( unseen_frame[output[4] - 1] + 1) print("velocity: {}".format(velocity)) else: #print("turn") direction_start[output[4] - 1] = len( people_path[output[4] - 1]) people_path[output[4] - 1].append( np.array((path_x, path_y))) unseen_frame[output[4] - 1] = 0 if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) for id in identities: for i in range(1, len(people_path[id - 1])): cv2.line(ori_im, (int(people_path[id - 1][i - 1][0]), int(people_path[id - 1][i - 1][1])), (int(people_path[id - 1][i][0]), int(people_path[id - 1][i][1])), (0, 0, 255), 3) end = time.time() print("time: {}s, fps: {}".format(end - start, 1 / (end - start))) print(area_dic) jump_flag += 1 if self.args.display: cv2.imshow("test", ori_im) cv2.waitKey(1) if self.args.save_path: self.output.write(ori_im)
def detect(self): # Check wheter there is next frame results = [] allDetection = dict() idx_frame = 0 #while self.vdo.grab(): while idx_frame < len(self.imgList): start = time.time() # Retrieve next frame #_, im = self.vdo.retrieve() im = cv2.imread(self.imgList[idx_frame]) # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # only for images # Detect object on image bbox_xcycwh, cls_conf, cls_ids = self.detectron2.detect(im) detection_mask = [(xc, yc + (h / 2)) for xc, yc, w, h in bbox_xcycwh] detection_mask = self.myCoordMapper.image2xy(detection_mask) detection_mask = [ False if x is None else True for x in detection_mask ] bbox_xcycwh, cls_conf, cls_ids = bbox_xcycwh[ detection_mask], cls_conf[detection_mask], cls_ids[ detection_mask] # TODO: Kell ide null check? if bbox_xcycwh is not None: # and len(bbox_xcycwh) > 0 # NOTE: This is double check since all the returned boxes are person objects (in the detect funcion it is asserted) # select class person mask = cls_ids == 0 cls_conf = cls_conf[mask] # NOTE: only the height is multiplies by 1.2, why? # ANSWER: bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector # TODO: Uncomment 1.1 bbox_xcycwh = bbox_xcycwh[mask] #bbox_xcycwh[:, 3:] *= 1.1 idx_frame += 1 # Összes box kirjazolása bb_xyxy = [[xc - w / 2, yc - h / 2, xc + w / 2, yc + h / 2] for xc, yc, w, h in bbox_xcycwh] bb_xyxy = [ x for x, conf in zip(bb_xyxy, cls_conf) if conf > self.deepsort.min_confidence ] all1 = [None] * len(bb_xyxy) im = draw_bboxes(im, bb_xyxy, all1) # Do tracking outputs, deadtracks = self.deepsort.update( bbox_xcycwh, cls_conf, im) print('len outputs:{0}, len deadtracks:{1}'.format( len(outputs), len(deadtracks))) # Draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] im = draw_bboxes(im, bbox_xyxy, identities) # Write to file bbox_tlwh = [ self.deepsort._xyxy_to_tlwh(bb) for bb in bbox_xyxy ] results.append((idx_frame - 1, bbox_tlwh, identities)) im = draw_frameNum(im, (2514, 330), idx_frame - 1) # Draw boxes for dead tracks for debugging if len(outputs) > 0: bbox_xyxy = [x[:4] for x in deadtracks] labels = [x[-1] for x in deadtracks] im = draw_dead_bboxes(im, bbox_xyxy, labels) end = time.time() print( "time: {}s, fps: {}, frame: {}".format(end - start, 1 / (end - start), idx_frame - 1), '\n', '-' * 30, '\n') if self.args.save_path: self.output.write(im) # Write all tracked objs to file write_results(self.args.result_path, results, 'mot')