def tracker(self): # initialize deepsort cfg = get_config() cfg.merge_from_file("deep_sort_pytorch/configs/deep_sort.yaml") deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=False) return deepsort
def build_tracker(self): """ Build the deep sort tracker from default config To change config, tweak from the yaml file here: deep_sort_pytorch/configs/deep_sort.yaml """ cfg = get_config() cfg.merge_from_file(self.config_deepsort) use_cuda = not self.device == "cpu" and torch.cuda.is_available() self.tracker = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
def main(data_root='', seqs=('', ), args=""): logger = get_logger() logger.setLevel(logging.INFO) data_type = 'mot' result_root = os.path.join(Path(data_root), "mot_results") mkdir_if_missing(result_root) cfg = get_config() # cfg.merge_from_file(args.config_detection) cfg.merge_from_file(args.config_deepsort) # run tracking accs = [] for seq in seqs: logger.info('start seq: {}'.format(seq)) result_filename = os.path.join(result_root, 'result.txt') # video_path = data_root+"/"+seq+"/video/video.mp4" # with VideoTracker(cfg, args, video_path, result_filename) as vdo_trk: # vdo_trk.run() # eval logger.info('Evaluate seq: {}'.format(seq)) evaluator = Evaluator(data_root, seq, data_type) accs.append(evaluator.eval_file(result_filename)) # get summary metrics = mm.metrics.motchallenge_metrics mh = mm.metrics.create() summary = Evaluator.get_summary(accs, seqs, metrics) strsummary = mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names) print(strsummary) Evaluator.save_summary(summary, os.path.join(result_root, 'summary_global.xlsx'))
def detect(config): # sent_videos = set() TIME_TO_SEND_MSG = 10 # Greenvich Time months_rus = ('января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря') fpeses = [] fps = 0.0 fps_imutils = imutils.video.FPS().start() left_array = None rect_left = None token = "xxx" bot = telebot.TeleBot(token) def send_message(current_date, counter_in, counter_out): channel = '-1001399933919' msg_tosend = "{}: зашло {}, вышло {}".format(current_date, counter_in, counter_out) bot.send_message(chat_id=channel, text=msg_tosend) # camera info save_img = True imgsz = (416, 416) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"] webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) # initial objects of classes counter = Counter(counter_in=0, counter_out=0, track_id=0) VideoHandler = Writer() deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device, weights etc. device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else config["device"]) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Initialize model model = Darknet(config["cfg"], imgsz) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict(torch.load(weights, map_location=device)['model'], strict=False) else: # darknet format load_darknet_weights(model, weights) # Eval mode model.to(device).eval() # Half precision print(half) half = half and device.type != 'cpu' # half precision only supported on CUDA print(half) if half: model.half() if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img.float() ) if device.type != 'cpu' else None # run once for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): if rect_left is None: if webcam: # batch_size >= 1 im0 = im0s[0].copy() else: im0 = im0s left_array = [0, 0, im0.shape[1] / 2, im0.shape[0]] rect_left = Rectangle(left_array[0], left_array[1], left_array[2], left_array[3]) flag_anyone_in_door = False img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=config["augment"])[0] # to float if half: pred = pred.float() # Apply NMS classes = None if config["classes"] == "None" else config["classes"] pred = non_max_suppression(pred, config["conf_thres"], config["iou_thres"], multi_label=False, classes=classes, agnostic=config["agnostic_nms"]) # Process detections lost_ids = counter.return_lost_ids() for i, det in enumerate(pred): # detections for image i if webcam: # batch_size >= 1 im0 = im0s[i].copy() else: im0 = im0s bbox_xywh = [] confs = [] if det is not None and len(det): # Rescale boxes from imgsz to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): if names[int(c)] not in config["needed_classes"]: continue # Write results for *xyxy, conf, cls in det: # check if bbox`s class is needed if names[int(cls)] not in config["needed_classes"]: continue x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) detections = torch.Tensor(bbox_xywh) confidences = torch.Tensor(confs) if len(detections) != 0: outputs_tracked = deepsort.update(detections, confidences, im0) counter.someone_inframe() # draw boxes for visualization if len(outputs_tracked) > 0: bbox_xyxy = outputs_tracked[:, :4] identities = outputs_tracked[:, -1] draw_boxes(im0, bbox_xyxy, identities) counter.update_identities(identities) for bbox_tracked, id_tracked in zip(bbox_xyxy, identities): ratio_initial = find_ratio_ofbboxes(bbox=bbox_tracked, rect_compare=rect_left) # чел первый раз в контуре двери if VideoHandler.counter_frames_indoor == 0: VideoHandler.start_video(id_tracked) flag_anyone_in_door = True elif id_tracked not in VideoHandler.id_inside_door_detected: VideoHandler.continue_opened_video(id=id_tracked, seconds=3) flag_anyone_in_door = True if id_tracked not in counter.people_init or counter.people_init[ id_tracked] == 0: counter.obj_initialized(id_tracked) if ratio_initial >= 0.8 and bbox_tracked[ 3] < left_array[3]: counter.people_init[id_tracked] = 2 elif ratio_initial < 0.8 and bbox_tracked[ 3] > left_array[3]: counter.people_init[id_tracked] = 1 else: # res is None, means that object is not in door contour counter.people_init[id_tracked] = 1 counter.frame_age_counter[id_tracked] = 0 counter.people_bbox[id_tracked] = bbox_tracked counter.cur_bbox[id_tracked] = bbox_tracked else: deepsort.increment_ages() if counter.need_to_clear(): counter.clear_all() for val in counter.people_init.keys(): # check bbox also cur_c = find_centroid(counter.cur_bbox[val]) init_c = find_centroid(counter.people_bbox[val]) vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1]) if val in lost_ids and counter.people_init[val] != -1: # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val], rect_compare=rect_left) if vector_person[0] > 200 and counter.people_init[val] == 2 \ and ratio < 0.7: counter.get_out() VideoHandler.stop_recording( action_occured="вышел из кабинета") print('video {}, action: {}, vector {} \n'.format( VideoHandler.video_name, VideoHandler.action_occured, vector_person)) elif vector_person[0] < -100 and counter.people_init[val] == 1 \ and ratio >= 0.7: counter.get_in() VideoHandler.stop_recording( action_occured="вышел из кабинета") print('video {}, action: {}, vector {} \n'.format( VideoHandler.video_name, VideoHandler.action_occured, vector_person)) counter.people_init[val] = -1 lost_ids.remove(val) counter.clear_lost_ids() ins, outs = counter.show_counter() cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.rectangle(im0, (int(left_array[0]), int(left_array[1])), (int(left_array[2]), int(left_array[3])), (23, 158, 21), 3) cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * im0.shape[0], (255, 255, 255), 3) if VideoHandler.stop_writing(im0): # send_new_posts(video_name, action_occured) sent_videos.add(VideoHandler.video_name) with open('data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr: wr.write('video {}, action: {}, vector {} \n'.format( VideoHandler.video_name, VideoHandler.action_occured, vector_person)) VideoHandler = Writer() VideoHandler.set_fps(fps) else: VideoHandler.continue_writing(im0, flag_anyone_in_door) if view_img: cv2.imshow('im0', im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration delta_time = (torch_utils.time_synchronized() - t1) if len(fpeses) < 30: fpeses.append(1 / delta_time) elif len(fpeses) == 30: median_fps = float(np.median(np.array(fpeses))) # fps = round(median_fps, 1) fps = 20 print('fps set: ', fps) VideoHandler.set_fps(fps) counter.set_fps(fps) fpeses.append(fps) motion_detection = True else: print('\nflag writing video: ', VideoHandler.flag_writing_video) print('flag stop writing: ', VideoHandler.flag_stop_writing) print('flag anyone in door: ', flag_anyone_in_door) print('counter frames indoor: ', VideoHandler.counter_frames_indoor) # fps = 20 gm_time = gmtime() if gm_time.tm_hour == TIME_TO_SEND_MSG and not counter.just_inited: day = gm_time.tm_mday month = months_rus[gm_time.tm_mon - 1] year = gm_time.tm_year date = "{} {} {}".format(day, month, year) in_counted, out_counted = counter.show_counter() send_message(current_date=date, counter_in=in_counted, counter_out=out_counted) counter = Counter(0, 0, 0)
def detect(opt, save_img=False): global bird_image out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize the ROI frame cv2.namedWindow("image") cv2.setMouseCallback("image", get_mouse_points) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names #initialize moving average window movingAverageUpdater = movingAverage.movingAverage(5) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' d = DynamicUpdate() d.on_launch() risk_factors = [] frame_nums = [] count = 0 for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): if (frame_idx == 0): while True: image = im0s cv2.imshow("image", image) cv2.waitKey(1) if len(mouse_pts) == 7: cv2.destroyWindow("image") break four_points = mouse_pts # Get perspective, M is the transformation matrix for bird's eye view M, Minv = get_camera_perspective(image, four_points[0:4]) # Last two points in getMousePoints... this will be the threshold distance between points threshold_pts = src = np.float32(np.array([four_points[4:]])) # Convert distance to bird's eye view warped_threshold_pts = cv2.perspectiveTransform(threshold_pts, M)[0] # Get distance in pixels threshold_pixel_dist = np.sqrt( (warped_threshold_pts[0][0] - warped_threshold_pts[1][0])**2 + (warped_threshold_pts[0][1] - warped_threshold_pts[1][1])**2) # Draw the ROI on the output images ROI_pts = np.array([ four_points[0], four_points[1], four_points[3], four_points[2] ], np.int32) # initialize birdeye view video writer frame_h, frame_w, _ = image.shape bevw = birdeye_video_writer.birdeye_video_writer( frame_h, frame_w, M, threshold_pixel_dist) else: break t = time.time() for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): print("Loop time: ", time.time() - t) t = time.time() img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) cv2.polylines(im0s, [ROI_pts], True, (0, 255, 255), thickness=4) # Inferenc tOther = time.time() t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() print("Non max suppression and inference: ", time.time() - tOther) print("Pre detection time: ", time.time() - t) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() bbox_xywh = [] bbox_xyxy = [] confs = [] ROI_polygon = Polygon(ROI_pts) # Adapt detections to deep sort input format for *xyxy, conf, cls in det: img_h, img_w, _ = im0.shape x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy) obj = [x_c, y_c, bbox_w, bbox_h] confs.append([conf.item()]) bbox_xyxy.append(xyxy) bbox_xywh.append(obj) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort deepsortTime = time.time() #outputs = deepsort.update(xywhs, confss, im0) print("Deepsort function call: ", (time.time() - deepsortTime)) outputs = bbox_xyxy # draw boxes for visualization if len(outputs) > 0: # filter deepsort output outputs_in_ROI, ids_in_ROI = remove_points_outside_ROI( bbox_xyxy, ROI_polygon) center_coords_in_ROI = xywh_to_center_coords( outputs_in_ROI) warped_pts = birdeye_transformer.transform_center_coords_to_birdeye( center_coords_in_ROI, M) clusters = DBSCAN(eps=threshold_pixel_dist, min_samples=1).fit(warped_pts) print(clusters.labels_) draw_boxes(im0, outputs_in_ROI, clusters.labels_) risk_dict = Counter(clusters.labels_) bird_image = bevw.create_birdeye_frame( warped_pts, clusters.labels_, risk_dict) # movingAverageUpdater.updatePoints(warped_pts, ids_in_ROI) # # gettingAvgTime = time.time() # movingAveragePairs = movingAverageUpdater.getCurrentAverage() # # movingAverageIds = [id for id, x_coord, y_coord in movingAveragePairs] # movingAveragePts = [(x_coord, y_coord) for id, x_coord, y_coord in movingAveragePairs] # embded the bird image to the video # otherStuff = time.time() # if(len(movingAveragePairs) > 0): # movingAvgClusters = DBSCAN(eps=threshold_pixel_dist, min_samples=1).fit(movingAveragePts) # movingAvgClustersLables = movingAvgClusters.labels_ # risk_dict = Counter(movingAvgClustersLables) # bird_image = bevw.create_birdeye_frame(movingAveragePts, movingAvgClustersLables, risk_dict) # bird_image = resize(bird_image, 20) # bv_height, bv_width, _ = bird_image.shape # frame_x_center, frame_y_center = frame_w //2, frame_h//2 # x_offset = 20 # # im0[ frame_y_center-bv_height//2:frame_y_center+bv_height//2, \ # x_offset:bv_width+x_offset ] = bird_image # else: # risk_dict = Counter(clusters.labels_) # bird_image = bevw.create_birdeye_frame(warped_pts, clusters.labels_, risk_dict) bird_image = resize(bird_image, 20) bv_height, bv_width, _ = bird_image.shape frame_x_center, frame_y_center = frame_w // 2, frame_h // 2 x_offset = 20 im0[frame_y_center - bv_height // 2:frame_y_center + bv_height // 2, \ x_offset:bv_width + x_offset] = bird_image # print("Other stuff: ", time.time() - otherStuff) #write the risk graph risk_factors += [compute_frame_rf(risk_dict)] frame_nums += [frame_idx] graphTime = time.time() if (frame_idx > 100): count += 1 frame_nums.pop(0) risk_factors.pop(0) if frame_idx % 10 == 0: d.on_running(frame_nums, risk_factors, count, count + 100) print("Graph Time: ", time.time() - graphTime) # Write MOT compliant results to file if save_txt and len(outputs_in_ROI) != 0: for j, output in enumerate(outputs_in_ROI): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format # Stream results if view_img: # cv2.imshow("bird_image", bird_image) cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, bird_image) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time # Load model model = torch.load(weights, map_location=device)[ 'model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = False save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' url = 'sample_url' uid = 'bus1' os.system('shutdown -r 06:00') memory = {} people_counter = 0 car_counter = 0 in_people = 0 out_people = 0 time_sum = 0 now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) img_center_x = int(im0.shape[1]//2) # line = [(0,img_center_y),(im0.shape[1],img_center_y)] line = [(int(img_center_x + 50),0),(img_center_x+50,int(im0.shape[0]))] line2 = [(int(img_center_x + 170),0),(img_center_x+170,int(im0.shape[0]))] cv2.line(im0,line[0],line[1],(0,0,255),5) cv2.line(im0,line2[0],line2[1],(0,255,0),5) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() crop_xyxy = det[:,:4] det = det[crop_xyxy[:,0]<img_center_x + 170] # line 오른쪽 지우기 if len(det) == 0: pass else: # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] bbox_xyxy = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h,int(cls)] #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10) bbox_xywh.append(obj) # bbox_xyxy.append(rec) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # deepsort index_id = [] previous = memory.copy() memory = {} boxes = [] names_ls = [] # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -2] labels = outputs[:,-1] dic = {0:'person',2:'car'} for i in labels: names_ls.append(dic[i]) # print('output len',len(outputs)) for output in outputs: boxes.append([output[0],output[1],output[2],output[3]]) index_id.append('{}-{}'.format(names_ls[-1],output[-2])) memory[index_id[-1]] = boxes[-1] if time_sum>=60: param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')} response = requests.post(url,data=param) response_text = response.text with open('counting.txt','a') as f: f.write('{}~{} IN : {}, Out : {} Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text)) people_counter,car_counter,in_people,out_people = 0,0,0,0 time_sum = 0 now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') i = int(0) for box in boxes: # extract the bounding box coordinates (x, y) = (int(box[0]), int(box[1])) (w, h) = (int(box[2]), int(box[3])) if index_id[i] in previous: previous_box = previous[index_id[i]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (int(x + (w-x)/2), int(y + (h-y)/2)) p1 = (int(x2 + (w2-x2)/2), int(y2 + (h2-y2)/2)) cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point if intersect(p0, p1, line[0], line[1]) and index_id[i].split('-')[0] == 'person': people_counter += 1 if p0[0] > line[1][0]: in_people +=1 else: out_people +=1 if intersect(p0, p1, line[0], line[1]) and index_id[i].split('-')[0] == 'car': car_counter +=1 i += 1 draw_boxes(im0,bbox_xyxy,identities,labels) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() cv2.putText(im0, 'In : {}, Out : {}'.format(in_people,out_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) cv2.putText(im0, 'Person : {}'.format(people_counter), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) if time_sum>=60: param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')} response = requests.post(url,data=param) response_text = response.text with open('counting.txt','a') as f: f.write('{}~{} IN : {}, Out : {}, Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text)) people_counter,car_counter,in_people,out_people = 0,0,0,0 time_sum = 0 now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') print('%sDone. (%.3fs)' % (s, t2 - t1)) time_sum += t2-t1 # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')} response = requests.post(url,data=param) response_text = response.text with open('counting.txt','a') as f: f.write('{}~{} IN : {}, Out : {}, Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text)) print('Done. (%.3fs)' % (time.time() - t0))
def infer(model, data_path, detections_file, resize, max_size, batch_size, config_deepsort, mixed_precision=False, is_master=True, world=0, original_annotations=None, use_dali=True, is_validation=False, verbose=False, save_images=False, output_path='./'): 'Run inference on images from path' # import pdb;pdb.set_trace() if os.path.isdir(output_path): shutil.rmtree(output_path) os.mkdir(output_path) print('model', model) backend = 'pytorch' if isinstance(model, Model) or isinstance( model, DDP) else 'tensorrt' #print("backend",backend) stride = model.module.stride if isinstance(model, DDP) else model.stride # TensorRT only supports fixed input sizes, so override input size accordingly if backend == 'tensorrt': max_size = max(model.input_size) cfg = get_config() cfg.merge_from_file(config_deepsort) conf_threshold = cfg.DEEPSORT.MIN_CONFIDENCE # Prepare model if backend is 'pytorch': # If we are doing validation during training, # no need to register model with AMP again if not is_validation: if torch.cuda.is_available(): model = model.cuda() model = amp.initialize(model, None, opt_level='O2' if mixed_precision else 'O0', keep_batchnorm_fp32=True, verbosity=0) model.eval() if verbose: print(' backend: {}'.format(backend)) print(' device: {} {}'.format( world, 'cpu' if not torch.cuda.is_available() else 'gpu' if world == 1 else 'gpus')) print(' batch: {}, precision: {}'.format( batch_size, 'unknown' if backend is 'tensorrt' else 'mixed' if mixed_precision else 'full')) print('Running inference on {}'.format(os.path.basename(data_path))) results = [] profiler = Profiler(['infer', 'fw']) def processResult(results, data_iterator): p_detections = [] C = data_iterator.coco for d in results: id, outputs, ratios = d img = C.loadImgs([id]) filename = img[0]['file_name'] result = ['', [], []] result[0] = os.path.join(path, filename) if len(outputs) > 0: # import pdb;pdb.set_trace() outputs[:, :4] = outputs[:, :4] / ratios result[1] = outputs A = C.loadAnns(C.getAnnIds([id])) # import pdb;pdb.set_trace() for a in A: x1, y1, w, h = a['bbox'] a['bbox'] = [x1, y1, x1 + w, y1 + h] result[2] = A p_detections += [result] return p_detections path = data_path #+ 'sequences/' videoList = os.listdir(path) # Prepare dataset if verbose: print('Preparing dataset...') # Create annotations if none was provided if not original_annotations: return else: annotations = original_annotations data_iterator = DataIterator(path, resize, max_size, batch_size, stride, world, annotations, training=False) detection_results = [] id_count = 0 sort_time = 0 with torch.no_grad(): for i, (data, ids, ratios) in enumerate(tqdm(data_iterator)): video = os.path.dirname( data_iterator.coco.loadImgs(ids.item())[0]['file_name']) if not os.path.isfile(os.path.join(output_path, video + '.txt')): id_count = i open(os.path.join(output_path, video + '.txt'), "w+") deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,\ max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,\ nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,\ max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,\ use_cuda=True) print(id_count) if save_images and len(results) > 0: output_anno = processResult(results, data_iterator=data_iterator) print("saving output images...") save_path = os.path.dirname( data_path) + '/outputs/' + video if os.path.isdir(save_path): shutil.rmtree(save_path) os.mkdir(save_path) show_MOT(save_path, output_anno) results = [] # print("data:",data) # import pdb;pdb.set_trace() profiler.start('fw') t1 = time_synchronized() scores, boxes, classes = model(data) profiler.stop('fw') detection_results.append([scores, boxes, classes, ids, ratios]) # import pdb;pdb.set_trace() t2 = time_synchronized() im = data[0].permute(1, 2, 0).cpu().numpy() xywhs = torch.stack([ torch.stack([ x1 + (x2 - x1 + 1) / 2, y1 + (y2 - y1 + 1) / 2, x2 - x1 + 1, y2 - y1 + 1 ]) for x1, y1, x2, y2 in boxes[0].round() ]).cpu() t3 = time_synchronized() outputs = deepsort.update(xywhs, scores[0].cpu(), im, mapClasses(classes[0].cpu())) # outputs = torch.Tensor(outputs).reshape(1,-1,5) t4 = time_synchronized() sort_time += t4 - t3 if len(outputs) > 0: outputs[:, :4] = outputs[:, :4] / ratios[0].item() # print(t2-t1,t3-t2,t4-t3) results.append([ids[0].item(), outputs, 1]) # write result to txt if len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] - output[0] bbox_h = output[3] - output[1] identity = output[-2] cls = output[-1] # if cls == 2: # continue # import pdb;pdb.set_trace() with open(os.path.join(output_path, video + '.txt'), 'a') as f: f.write(('%g,' * 10 + '\n') % (ids[0].item() - id_count, identity, bbox_left, bbox_top, bbox_w, bbox_h, 1, cls, -1, -1)) # label format profiler.bump('infer') if verbose and (profiler.totals['infer'] > 60 or i == len(data_iterator) - 1): size = len(data_iterator.ids) msg = '[{:{len}}/{}]'.format(min((i + 1) * batch_size, size), size, len=len(str(size))) msg += ' {:.3f}s/{}-batch'.format(profiler.means['infer'], batch_size) msg += ' (fw: {:.3f}s)'.format(profiler.means['fw']) msg += ', {:.1f} im/s'.format(batch_size / profiler.means['infer']) msg += ', {:.3f} in deepsort'.format(t4 - t3) print(msg, flush=True) profiler.reset() print("Average FPS = {}".format(i / profiler.totals['infer'])) print("Average tracking time = {}".format(sort_time / i)) # Gather results from all devices if verbose: print('Gathering results...') detection_results = [torch.cat(r, dim=0) for r in zip(*detection_results)] if world > 1: for r, result in enumerate(detection_results): all_result = [ torch.ones_like(result, device=result.device) for _ in range(world) ] torch.distributed.all_gather(list(all_result), result) detection_results[r] = torch.cat(all_result, dim=0) # import pdb; pdb.set_trace() if is_master: # Copy buffers back to host detection_results = [r.cpu() for r in detection_results] # Collect detections detections = [] processed_ids = set() count = [0, 0, 0] for scores, boxes, classes, image_id, ratios in zip( *detection_results): image_id = image_id.item() if image_id in processed_ids: continue processed_ids.add(image_id) keep = (scores > 0).nonzero() scores = scores[keep].view(-1) boxes = boxes[keep, :].view(-1, 4) / ratios # classes = classes[keep].view(-1).int() # import pdb; pdb.set_trace() classes = mapClasses(classes[keep].view(-1).int()) #print('classes', classes) for score, box, cat in zip(scores, boxes, classes): x1, y1, x2, y2 = box.data.tolist() cat = cat.item() if 'annotations' in data_iterator.coco.dataset: cat = data_iterator.coco.getCatIds()[cat] #if cat !=3: #continue #print('cat',cat) count[cat] += 1 if cat != 0: detections.append({ 'image_id': image_id, 'score': score.item(), 'bbox': [x1, y1, x2 - x1 + 1, y2 - y1 + 1], 'category_id': cat, 'identity': 1 }) print(count) if detections: # import pdb;pdb.set_trace() # Save detections if detections_file and verbose: print('Writing {}...'.format(detections_file)) detections = {'annotations': detections} detections['images'] = data_iterator.coco.dataset['images'] if 'categories' in data_iterator.coco.dataset: detections['categories'] = [ data_iterator.coco.dataset['categories'] ] if detections_file: json.dump(detections, open(detections_file, 'w'), indent=4) # Evaluate model on dataset if 'annotations' in data_iterator.coco.dataset: if verbose: print('Evaluating model...') with redirect_stdout(None): coco_pred = data_iterator.coco.loadRes( detections['annotations']) coco_eval = COCOeval(data_iterator.coco, coco_pred, 'bbox') coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() else: print('No detections!')
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)[ 'model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path_raw = str(Path(out)) + '/results_raw.txt' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) print(pred) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] clss = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) clss.append(cls.item()) bbox_xywh = bbox_xywh cls_conf = confs cls_ids = clss # xywhs = torch.Tensor(bbox_xywh) # confss = torch.Tensor(confs) # cls_ids = clss # if len(bbox_xywh) == 0: # continue # print("detection cls_ids:", cls_ids) #filter cls id for tracking # print("cls_ids") # print(cls_ids) # # select class # mask = [] # lst_move_life = [0,1,2] # # lst_for_track = [] # for id in cls_ids: # if id in lst_move_life: # # lst_for_track.append(id) # mask.append(True) # else: # mask.append() # # print("mask cls_ids:", mask) # # print(bbox_xywh) # bbox_xywh = list(compress(bbox_xywh,mask)) # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector # bbox_xywh[:,3:] *= 1.2 # cls_conf = list(compress(cls_conf,mask)) # print(cls_conf) bbox_xywh = torch.Tensor(bbox_xywh) cls_conf = torch.Tensor(cls_conf) # Pass detections to deepsort outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids) ''' TODO: 카운터 추가 요망 ''' # counting num and class # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, 4:5] cls_id = outputs[:,-1] # print(outputs[:,-1]) #--> 문제 발견 # print("track res cls_id:", cls_id) # cls_ids_show = [cls_ids[i] for i in cls_id] draw_boxes(im0, bbox_xyxy, cls_id, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[4] classname = output[5] with open(txt_path_raw, 'a') as f: # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물) f.write(('%g ' * 6 +'%g' *1 +'%g ' * 3 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, classname, -1, -1, -1)) # label format else: deepsort.increment_ages() # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def frames(): out, weights1,weights2, imgsz = \ 'result/','weights/yolov5x.pt', 'weights/best.pt', 416 source = 'uploads/Mask.mp4' save_txt = True txt_path = 'content/outputs.txt' # initialize deepsort cfg = get_config() cfg.merge_from_file('deep_sort_pytorch/configs/deep_sort.yaml') deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) #Initialize device = select_device() if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Half precision # half = False and device.type != 'cpu' half = True and device.type != 'cpu' print('half = ' + str(half)) # Load deepsort model model = torch.load(weights1, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 dataset = LoadImages(source, img_size=imgsz) names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None #save_path = str(Path(out)) #txt_path = str(Path(out)) + '/results.txt' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression( pred, 0.6, 0.3, classes=0, agnostic=False) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) #print(outputs) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) #print(det) # Write MOT compliant results to file if save_txt and len(outputs) != 0: print('inside savetxt') print(f'{s}Done. ({t2 - t1:.3f}s)') for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() # Load yolo model model = attempt_load(weights2, map_location=device) stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() #model.to(device).float().eval() # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights #modelc.to(device).float().eval() # Set Dataloader vid_path, vid_writer = None, None dataset = LoadImages(source, img_size=imgsz) #dataset = LoadStreams(source, img_size=imgsz) names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() history = defaultdict(list) DSOutput = pd.read_csv('content/outputs.txt', sep = ' ', header = None) for frameNumber, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.3, 0.3, classes=None, agnostic=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) for i, det in enumerate(pred): # detections per image p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) save_path = 'result/Mask.mp4' s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): num_cat = 6 classes = (det[:,-1].cpu().numpy()).astype(int) one_hot_cats = np.eye(num_cat)[classes].reshape(-1, num_cat) counts_per_cat = one_hot_cats.sum(axis=0) #print("Countspercat ", counts_per_cat) score = round(counts_per_cat[[1,3,5]].sum() / len(det),3) weighted_counts_per_cat = one_hot_cats.T @ np.asarray(det[:,-2].cpu()) WeightedCompliance = weighted_counts_per_cat[[1,3,5]].sum() / weighted_counts_per_cat.sum() # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() person_coords = DSOutput[DSOutput.iloc[:,0]==frameNumber].values.reshape(-1,11) CurrentFrameDetection = -1*np.zeros(len(det)) if(len(person_coords != 0)): for itemp,mask_coord in enumerate(det): # overlaps = [Overlap(mask_coord[:4], person_coord, img.shape[2], img.shape[3]) for person_coord in person_coords[:,2:6]] overlaps = [Overlap(mask_coord[:4].cpu(), person_coord, 10000, 10000) for person_coord in person_coords[:,2:6]] best_overlap = np.argmax(overlaps) best_person = person_coords[best_overlap,1] history[best_person].append(mask_coord[-1].cpu().item()) CurrentFrameDetection[itemp] = best_person #for c in det[:, -1].unique(): #probably error with torch 1.5 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string CurrentFrameDetection = list(reversed(CurrentFrameDetection)) for mask, (*xyxy, conf, cls) in enumerate(reversed(det)): label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, score, label=label, color=colors[int(cls)], personid=CurrentFrameDetection[mask], line_thickness=3) print(f'{s}Done. ({t2 - t1:.3f}s)') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) yield cv2.imencode('.jpg', im0)[1].tobytes() compliance = 0 total=0 txt_result_path = 'result/result.txt' for k,v in history.items(): # 1,3,5 are full # 2,4 are partial # 0 no good_frames = sum(np.array(v)%2==1) bad_frames = sum(np.array(v)%2==0) if len(v) > 4: total += 1 if good_frames >= bad_frames: compliance +=1 print('Person {} is compliant'.format(k)) with open(txt_result_path, 'a') as f: f.write('Person {} is compliant \n'.format(k)) else: print('Person {} is not compliant'.format(k)) with open(txt_result_path, 'a') as f: f.write('Person {} is not compliant \n'.format(k)) Overall_Compliance = round(compliance/total,3) with open(txt_result_path, 'a') as f: f.write('Overall compliance:' + str(Overall_Compliance)) print('Overall compliance:', Overall_Compliance) return(Overall_Compliance) print(f'Done. ({time.time() - t0:.3f}s)')
def detect(config): save_img = False imgsz = (320, 320) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img, save_txt = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"], config["save_txt"] webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else config["device"]) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Initialize model model = Darknet(config["cfg"], imgsz) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict(torch.load(weights, map_location=device)['model'], strict=False) else: # darknet format load_darknet_weights(model, weights) # Second-stage classifier (not used yet) classify = False modelc = 0 if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Eval mode model.to(device).eval() # Fuse Conv2d + BatchNorm2d layers # model.fuse() # Export mode if ONNX_EXPORT: # model.fuse() img = torch.zeros((1, 3) + imgsz) # (1, 3, 320, 192) f = config["weights"].replace(config["weights"].split('.')[-1], 'onnx') # *.onnx filename torch.onnx.export(model, img, f, verbose=False, opset_version=9, input_names=['images'], output_names=['classes', 'boxes']) # Validate exported model import onnx model = onnx.load(f) # Load the ONNX model onnx.checker.check_model(model) # Check that the IR is well formed print(onnx.helper.printable_graph( model.graph)) # Print a human readable representation of the graph return # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True view_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img.float() ) if device.type != 'cpu' else None # run once for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=config["augment"])[0] t2 = torch_utils.time_synchronized() # to float if half: pred = pred.float() # Apply NMS classes = None if config["classes"] == "None" else config["classes"] pred = non_max_suppression(pred, config["conf_thres"], config["iou_thres"], multi_label=False, classes=classes, agnostic=config["agnostic_nms"]) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections for image i if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out)) + '/results.txt' s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from imgsz to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): if names[int(c)] not in config["needed_classes"]: continue n = (det[:, -1] == c).sum() # detections per class s += '%g %s, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Write results for *xyxy, conf, cls in det: # check if bbox`s class is needed if names[int(cls)] not in config["needed_classes"]: continue x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(save_path[:save_path.rfind('.')] + '.txt', 'a') as file: file.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) detections = torch.Tensor(bbox_xywh) confidences = torch.Tensor(confs) # Pass detections to deepsort if len(detections) == 0: continue outputs = deepsort.update(detections, confidences, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*config["fourcc"]), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(config): COLOR_AROUND_DOOR = (48, 58, 221) COLOR_DOOR = (23, 158, 21) COLOR_LINE = (214, 4, 54) sent_videos = set() video_name = "" fpeses = [] fps = 0 # door_array = select_object() # door_array = [475, 69, 557, 258] global flag, vid_writer, lost_ids # initial parameters door_array = [611, 70, 663, 310] around_door_array = [507, 24, 724, 374] low_border = 225 high_border = 342 # door_c = find_centroid(door_array) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3]) # socket HOST = "localhost" PORT = 8083 # camera info save_img = True imgsz = (416, 416) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"] webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) # initial objects of classes counter = Counter() VideoHandler = Writer() deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device, weights etc. device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else config["device"]) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] if config["category_num"] <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"]) if not os.path.isfile('yolo/%s.trt' % config["model"]): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"]) # cap = cv2.VideoCapture(config["source"]) # if not cap.isOpened(): # raise SystemExit('ERROR: failed to open the input video file!') # frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') if webcam: torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) img = torch.zeros((3, imgsz, imgsz), device=device) # init img cls_dict = get_cls_dict(config["category_num"]) #vis = BBoxVisualization(cls_dict) vis = None h, w = get_input_shape(config["model"]) trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"]) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect((HOST, PORT)) img_shape = (256, 256) for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): t0 = time.time() flag_move = False flag_anyone_in_door = False ratio_detection = 0 # Process detections lost_ids = counter.return_lost_ids() if webcam: # batch_size >= 1 p, s, im0 = path[0], '%g: ' % 0, im0s[0].copy( ) # TODO mb needed in loop for detection else: p, s, im0 = path, '', im0s preds, confs, clss = perform_detection( frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis) scaled_pred = [] scaled_conf = [] detections = torch.Tensor() for i, (det, conf, cls) in enumerate(zip(preds, confs, clss)): if det is not None and len(det): if names[int(cls)] not in config["needed_classes"]: continue det = xyxy_to_xywh(det) # det = scale_coords(img_shape, det, im0.shape) scaled_pred.append(det) scaled_conf.append(conf) detections = torch.Tensor(scaled_pred) confidences = torch.Tensor(scaled_conf) # Pass detections to deepsort if len(detections) != 0: outputs = deepsort.update(detections, confidences, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # print('bbox_xywh ', bbox_xywh, 'id', identities) counter.update_identities(identities) for bbox_tracked, id_tracked in zip(bbox_xyxy, identities): ratio_initial = find_ratio_ofbboxes( bbox=bbox_tracked, rect_compare=rect_around_door) ratio_door = find_ratio_ofbboxes( bbox=bbox_tracked, rect_compare=rect_door) # чел первый раз в контуре двери if ratio_initial > 0.2: if VideoHandler.counter_frames_indoor == 0: # флаг о начале записи VideoHandler.start_video(id_tracked) flag_anyone_in_door = True elif ratio_initial > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected: VideoHandler.continue_opened_video(id=id_tracked, seconds=3) flag_anyone_in_door = True if id_tracked not in counter.people_init or counter.people_init[ id_tracked] == 0: counter.obj_initialized(id_tracked) if ratio_door >= 0.2 and low_border < bbox_tracked[ 3] < high_border: # was initialized in door, probably going out of office counter.people_init[id_tracked] = 2 elif ratio_door < 0.2: # initialized in the corridor, mb going in counter.people_init[id_tracked] = 1 # else: # # res is None, means that object is not in door contour # counter.people_init[id_tracked] = 1 counter.frame_age_counter[id_tracked] = 0 counter.people_bbox[id_tracked] = bbox_tracked counter.cur_bbox[id_tracked] = bbox_tracked else: deepsort.increment_ages() if counter.need_to_clear(): counter.clear_all() # Stream results vals_to_del = [] for val in counter.people_init.keys(): # check bbox also cur_c = find_centroid(counter.cur_bbox[val]) centroid_distance = np.sum( np.array([(door_c[i] - cur_c[i])**2 for i in range(len(door_c))])) ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val], rect_compare=rect_door) if val in lost_ids and counter.people_init[val] != -1: # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if counter.people_init[val] == 2 \ and ratio < 0.4 and centroid_distance > 5000: print('ratio out: {}\n centroids: {}\n'.format( ratio, centroid_distance)) counter.get_out() counter.people_init[val] = -1 VideoHandler.stop_recording( action_occured="вышел из кабинета") vals_to_del.append(val) elif counter.people_init[val] == 1 \ and ratio >= 0.4 and centroid_distance < 5000: print('ratio in: {}\n centroids: {}\n'.format( ratio, centroid_distance)) counter.get_in() counter.people_init[val] = -1 VideoHandler.stop_recording( action_occured="зашел внутрь") vals_to_del.append(val) lost_ids.remove(val) # TODO maybe delete this condition elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \ and counter.people_init[val] == 2: if ratio < 0.2 and centroid_distance > 10000: counter.get_out() print('ratio out max frames: ', ratio) counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел") vals_to_del.append(val) counter.age_counter[val] = 0 counter.clear_lost_ids() for valtodel in vals_to_del: counter.delete_person_data(track_id=valtodel) ins, outs = counter.show_counter() cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), COLOR_DOOR, 3) cv2.rectangle( im0, (int(around_door_array[0]), int(around_door_array[1])), (int(around_door_array[2]), int(around_door_array[3])), COLOR_AROUND_DOOR, 3) cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * im0.shape[0], (255, 255, 255), 3) cv2.line(im0, (door_array[0], low_border), (680, low_border), COLOR_LINE, 4) cv2.line(im0, (door_array[0], high_border), (680, high_border), COLOR_LINE, 4) if VideoHandler.stop_writing(im0): # send_new_posts(video_name, action_occured) sock.sendall( bytes( VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8")) data = sock.recv(100) print('Received', repr(data.decode("utf-8"))) sent_videos.add(VideoHandler.video_name) with open('data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr: wr.write( 'video {}, action: {}, centroid: {}, ratio_init: {}, ratio_door: {}, ratio: {} \n' .format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance, ratio_initial, ratio_door, ratio)) print('_________________video was sent _________________') VideoHandler = Writer() VideoHandler.set_fps(fps) else: VideoHandler.continue_writing(im0, flag_anyone_in_door) if view_img is True: cv2.imshow('image', im0) cv2.waitKey(1) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration delta_time = (time.time() - t0) # t2_ds = time.time() # print('%s Torch:. (%.3fs)' % (s, t2 - t1)) # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds)) if len(fpeses) < 15: fpeses.append(round(1 / delta_time)) print(delta_time) elif len(fpeses) == 15: # fps = round(np.median(np.array(fpeses))) median_fps = float(np.median(np.array(fpeses))) fps = round(median_fps, 2) print('max fps: ', fps) fps = 20 VideoHandler.set_fps(fps) counter.set_fps(fps) fpeses.append(fps) motion_detection = True else: if VideoHandler.flag_writing_video: print('\writing video ') if VideoHandler.flag_stop_writing: print('stop writing') if flag_anyone_in_door: print('anyone in door') if VideoHandler.counter_frames_indoor: print('counter frames indoor: {}'.format( VideoHandler.counter_frames_indoor))
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA last_time = time.time() # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' print('starting predictions...') # static vars time_total_start = 0 curve_goal = None for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): if curve_goal is not None: # CURVE ACTION # eval curve total_change = np.array([0], dtype='float64') total_time = 10 time_elapsed = time_synchronized() - time_total_start x_start = curve_goal.evaluate(time_elapsed / total_time)[0] for a in range(int(time_elapsed * 32), int((time_elapsed + 0.25) * 32)): if time_elapsed + 0.25 > total_time: break total_change += curve_goal.evaluate( a / (total_time * 32))[0] - x_start # print(total_change) # if total_change? > 9999999: # print('broke at', total_change, '[max 999999]') total_change /= (time_elapsed + 0.5) / total_time print((total_change / NORMALIZATION_CONSTANT)[0]) # print('time', time_elapsed/total_time, '\n') rx = max(min(total_change / NORMALIZATION_CONSTANT, [1]), [-1])[0] vals['rx'] = round(rx, 5) vals['ly'] = 1 vals['trot'] = 1 # print(vals['rx']) if time_synchronized() - time_total_start > total_time: curve_goal = None time_total_start = 0 vals['trot'] = 0 vals['ly'] = 0 # sys.exit() else: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: bezier_points = np.zeros((2 * len(outputs), 2)) idx = 0 for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format # calculating rotation and movement for dog! # gotta use vals['...'] # dim CAM_WIDTH x CAM_HEIGHT # rotation calculation middle_x = (bbox_left + bbox_w) / 2 # translation calculation percent_filled_y = (bbox_h - bbox_top) / CAM_HEIGHT percent_filled_y *= 100 # GENERATE TOLERANCE # max_width_tolerance = 0.375 * (bbox_w - bbox_left) left_bound = bbox_left - max_width_tolerance right_bound = bbox_w + max_width_tolerance # GENERATE DISTANCE # distance_rel = math.e**(-percent_filled_y / 30) # print('left, middle, right', left_bound, middle_x, right_bound) # print('dist', distance_rel, 'pct', percent_filled_y) # GENERATE POINTS # if percent_filled_y < 1.: bezier_points[idx][0] = -1 bezier_points[idx][1] = -1 bezier_points[idx + 1][0] = -1 bezier_points[idx + 1][1] = -1 else: if idx > 1: # relative to last box midpoint = bezier_points[idx - 1][ 0] # exit point of last node bezier_points[idx][ 0] = left_bound if middle_x >= midpoint else right_bound bezier_points[idx][ 1] = distance_rel - 0.005 bezier_points[idx + 1][ 0] = left_bound if middle_x >= midpoint else right_bound bezier_points[idx + 1][1] = distance_rel + 0.005 else: # rel to middle bezier_points[idx][ 0] = left_bound if middle_x >= ( CAM_WIDTH / 2) else right_bound bezier_points[idx][ 1] = distance_rel - 0.005 bezier_points[ idx + 1][0] = left_bound if middle_x >= ( CAM_WIDTH / 2) else right_bound bezier_points[idx + 1][1] = distance_rel + 0.005 idx += 2 if cv2.waitKey(1) == ord('f'): points = list() skipped_boxes = list() skip_idx = -1 for a in range(bezier_points.shape[0]): x = bezier_points[a][0] y = bezier_points[a][1] if bezier_points.shape[0] > a + 1 and abs( y - bezier_points[a + 1][1]) < .001: skip_idx = a + 1 if x < 0 or y < 0: continue if bezier_points.shape[0] > a + 3 and ( a != skip_idx ) and abs( x - bezier_points[a + 2][0] ) > CAM_WIDTH * 0.2: # threshold for ignorance # print('skipping', a + 2) skipped_boxes.append(a + 2) skipped_boxes.append(a + 3) for skipped_idx in skipped_boxes: bezier_points[skipped_idx][0] = -1 bezier_points[skipped_idx][1] = -1 for a in range(bezier_points.shape[0]): x = bezier_points[a][0] y = bezier_points[a][1] if x < 0 or y < 0: continue points.append((x, y)) far_pt = 0 if len(points) > 0: far_pt = points[-1][1] + 1 points.append((CAM_WIDTH / 2, 0)) points.append((CAM_WIDTH / 2, far_pt)) points.sort(key=y_coord_sort) nodes_curve_norm = np.swapaxes( np.array(points), 1, 0) nodes_curve = np.asfortranarray(nodes_curve_norm) # print(frame_idx, nodes_curve) print('calculating curve on frame', frame_idx) curve = bezier.Curve(nodes_curve, degree=(nodes_curve.shape[1] - 1)) # DISPLAY DATA # # x = left/right bound [0,720] # y = distance [1, e**-3] for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] percent_filled_y = (bbox_h - bbox_top) / CAM_HEIGHT percent_filled_y *= 100 distance_rel = math.e**(-percent_filled_y / 30) plt.plot([bbox_left, bbox_w], [distance_rel, distance_rel]) plot_bez(curve, frame_idx) # set curve time_total_start = time_synchronized() curve_goal = curve # sys.exit() else: deepsort.increment_ages() # Print time (inference + NMS) # print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if not DEBUG_MODE: if time.time() - last_time > cooldown: # so we dont spam send_commands(vals) last_time = time.time() events = sel.select(timeout=1) if events: for key, mask in events: service_connection(key, mask) if not sel.get_map(): break if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = view_img save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None # save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' vid = cv2.VideoCapture(source) filename = os.path.basename(source).split('.')[0] save_path = f"results/{filename}_action.mp4" fps = vid.get(cv2.CAP_PROP_FPS) w = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*("mp4v")), fps, (w, h)) # ffmpeg setup pipe = Popen([ 'ffmpeg', '-loglevel', 'quiet', '-y', '-f', 'image2pipe', '-vcodec', 'mjpeg', '-framerate', f'{fps}', '-i', '-', '-vcodec', 'libx264', '-crf', '28', '-preset', 'veryslow', '-framerate', f'{fps}', f'{save_path}' ], stdin=PIPE) length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) pbar = tqdm(total=length, position=0, leave=True) for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): start = time.time() img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, im0 = path, im0s # s += '%gx%g ' % img.shape[2:] # print string # save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class # s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort im0 = deepsort.update(xywhs, confss, im0) # # draw boxes for visualization # if len(outputs) > 0: # bbox_xyxy = outputs[:, :4] # identities = outputs[:, -1] # draw_boxes(im0, bbox_xyxy, identities) else: deepsort.increment_ages() # Print time (inference + NMS) runtime_fps = 1 / (time.time() - start) # print(f"Runtime FPS: {runtime_fps:.2f}") pbar.set_description(f"runtime_fps: {runtime_fps}") pbar.update(1) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) # vid_writer.write(im0) im0 = Image.fromarray(im0[..., ::-1]) # print(im0) im0.save(pipe.stdin, 'JPEG') if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) # vid_writer.release() pipe.stdin.close() pipe.wait() pbar.close() print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt): out, source, yolo_weights, deep_sort_weights, show_vid, save_vid, save_txt, imgsz, evaluate = \ opt.output, opt.source, opt.yolo_weights, opt.deep_sort_weights, opt.show_vid, opt.save_vid, \ opt.save_txt, opt.img_size, opt.evaluate webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) attempt_download(deep_sort_weights, repo='mikel-brostrom/Yolov5_DeepSort_Pytorch') deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to # its own .txt file. Hence, in that case, the output folder is not restored if not evaluate: if os.path.exists(out): pass shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(yolo_weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr(model, 'module') else model.names # get class names if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None # Check if environment supports image displays if show_vid: show_vid = check_imshow() if webcam: cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() save_path = str(Path(out)) # extract what is in between the last '/' and last '.' txt_file_name = source.split('/')[-1].split('.')[0] txt_path = str(Path(out)) + '/' + txt_file_name + '.txt' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_sync() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_sync() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) annotator = Annotator(im0, line_width=2, pil=not ascii) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string xywhs = xyxy2xywh(det[:, 0:4]) confs = det[:, 4] clss = det[:, 5] # pass detections to deepsort outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0) # draw boxes for visualization if len(outputs) > 0: for j, (output, conf) in enumerate(zip(outputs, confs)): bboxes = output[0:4] id = output[4] cls = output[5] c = int(cls) # integer class label = f'{id} {names[c]} {conf:.2f}' annotator.box_label(bboxes, label, color=colors(c, True)) if save_txt: # to MOT format bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] - output[0] bbox_h = output[3] - output[1] # Write MOT compliant results to file with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, id, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results im0 = annotator.result() if show_vid: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_vid: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_vid: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz, GCP_list = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.GCP_list webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = torch.load(weights, map_location=device)['model'].float() # load to FP32 model.to(device).eval() if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path_raw = str(Path(out)) + '/results_raw.txt' # 속도까지 붙여버린 데이터 따로 생성해서 비교해보자 : 수정수정 txt_path_raw2 = str(Path(out)) + '/results_raw2.txt' # point load with open('./mapdata/point.yaml') as f: data = yaml.load(f.read()) frm_point = data['frm_point'] geo_point = data['geo_point'] Counter_1 = [(488, 589), (486, 859)] Counter_2 = [(3463, 795), (3487, 1093)] Counter_list = [Counter_1, Counter_2] datum_dist = [] counter_dist = [] line_fileName = './mapdata/Busan1_IC_Polyline_to_Vertex.txt' all_line = mapdata_load(line_fileName, frm_point, geo_point) percep_frame = 5 from _collections import deque pts = [deque(maxlen=percep_frame + 1) for _ in range(10000)] ptsSpeed = [deque(maxlen=1) for _ in range(10000)] frame_len = calc_dist(frm_point[1], frm_point[4]) geo_len = calc_dist(geo_point[1], geo_point[4]) # ----------------- fix val start fixcnt = 1 # ----------------- fix val end # ----------------- counter val start memory_index = {} memory_id = {} cnt = np.zeros((len(Counter_list), 4)) # total_counter = 0 # 나중에 총 카운터를 만들어 넣으면 되겠지? # count_1_total = 0 # count_1_veh_c0 = 0 # count_1_veh_c1 = 0 # count_1_veh_c2 = 0 # count_2_total = 0 # count_2_veh_c0 = 0 # count_2_veh_c1 = 0 # count_2_veh_c2 = 0 # ----------------- counter val end for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) print(pred) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] clss = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) clss.append(cls.item()) bbox_xywh = bbox_xywh cls_conf = confs cls_ids = clss # xywhs = torch.Tensor(bbox_xywh) # confss = torch.Tensor(confs) # cls_ids = clss # if len(bbox_xywh) == 0: # continue # print("detection cls_ids:", cls_ids) #filter cls id for tracking # print("cls_ids") # print(cls_ids) # # select class # mask = [] # lst_move_life = [0,1,2] # # lst_for_track = [] # for id in cls_ids: # if id in lst_move_life: # # lst_for_track.append(id) # mask.append(True) # else: # mask.append() # # print("mask cls_ids:", mask) # # print(bbox_xywh) # bbox_xywh = list(compress(bbox_xywh,mask)) # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector # bbox_xywh[:,3:] *= 1.2 # cls_conf = list(compress(cls_conf,mask)) # print(cls_conf) bbox_xywh = torch.Tensor(bbox_xywh) cls_conf = torch.Tensor(cls_conf) # Pass detections to deepsort outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids) """ # output 형식 [[박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버], [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버], [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버], [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버], ...] """ # ------------------------------------------------------------------------------------------------------ img fix start t3 = time_synchronized() match_mid_point_list = matcher_BRISK_BF(im0, GCP_list) t4 = time_synchronized() # ---------------------------------------------------------------------------------------------------------------------- line start # 기준점 위치 갱신을 위한 삼변측량의 거리 정의 및 고정 if frame_idx == 0: for pointNum in range(len(frm_point)): for GCP_num in range(len(match_mid_point_list)): datum_dist.append( point_dist(match_mid_point_list[GCP_num], frm_point[pointNum])) datum_dist = np.reshape( datum_dist, (len(frm_point), len(match_mid_point_list))) for Ct_list in Counter_list: for Ctpoint_num in range(len(Ct_list)): for GCP_num in range(len(match_mid_point_list)): counter_dist.append( point_dist(match_mid_point_list[GCP_num], Ct_list[Ctpoint_num])) counter_dist = np.reshape(counter_dist, (len(Counter_list), len(Ct_list), len(match_mid_point_list))) t5 = time_synchronized() pre_P = (0, 0) for line_num, eachline in enumerate(all_line): for newpoint in eachline['frmPoint']: if line_num == 0: im0 = cv2.circle(im0, newpoint, 5, (0, 0, 255), -1) # 차선_실선 if calc_dist(pre_P, newpoint) < 390: im0 = cv2.line(im0, pre_P, newpoint, (0, 0, 255), 2, -1) elif line_num == 1: im0 = cv2.circle(im0, newpoint, 5, (0, 255, 0), -1) # 도로 경계 if calc_dist(pre_P, newpoint) < 420: im0 = cv2.line(im0, pre_P, newpoint, (0, 255, 0), 2, -1) elif line_num == 2: im0 = cv2.circle(im0, newpoint, 5, (255, 0, 0), -1) # 차선_겹선 if calc_dist(pre_P, newpoint) < 350: im0 = cv2.line(im0, pre_P, newpoint, (255, 0, 0), 2, -1) else: im0 = cv2.circle(im0, newpoint, 5, (100, 100, 0), -1) # 차선_점선 if calc_dist(pre_P, newpoint) < 600: im0 = cv2.line(im0, pre_P, newpoint, (100, 100, 0), 2, -1) pre_P = newpoint t6 = time_synchronized() for pointNum in range(len(frm_point)): im0 = cv2.circle(im0, frm_point[pointNum], 10, (0, 0, 0), -1) newPoint = intersectionPoint(match_mid_point_list, datum_dist[pointNum]) frm_point[pointNum] = newPoint t7 = time_synchronized() #---------------------------------------------------------------------------------------------------------------------- line end # ------------------------------------------------------------------------------------------------------ img fix end # ------------------------------------------------------------------------------------------------------ counting num and class start Counter_newpoint = [] for Ct_num in range(len(Counter_list)): Ct_list = Counter_list[Ct_num] for Ctpoint_num in range(len(Ct_list)): Counter_newpoint.append( intersectionPoint( match_mid_point_list, counter_dist[Ct_num][Ctpoint_num])) Counter_newpoint = np.reshape( Counter_newpoint, (len(Counter_list), len(Ct_list), 2)) for CountNum in Counter_newpoint: im0 = cv2.line(im0, tuple(CountNum[0]), tuple(CountNum[1]), (0, 0, 0), 5, -1) boxes = [] indexIDs = [] classIDs = [] previous_index = memory_index.copy() previous_id = memory_id.copy() memory_index = {} memory_id = {} COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): boxes.append( [output[0], output[1], output[2], output[3]]) indexIDs.append(int(output[4])) classIDs.append(int(output[5])) memory_index[indexIDs[-1]] = boxes[ -1] # 인덱스 아이디와 박스를 맞춰줌 memory_id[indexIDs[-1]] = classIDs[ -1] # 인덱스 아이디와 클레스 아이디를 맞춰줌 if len(pts[output[4]]) == 0: pts[output[4]].append(frame_idx) center = (int(((output[0]) + (output[2])) / 2), int(((output[1]) + (output[3])) / 2)) pts[output[4]].append(center) if len(pts[output[4]]) == percep_frame + 1: frmMove_len = np.sqrt( pow( pts[output[4]][-1][0] - pts[output[4]][-percep_frame][0], 2) + pow( pts[output[4]][-1][1] - pts[output[4]][-percep_frame][1], 2)) geoMove_Len = geo_len * frmMove_len / frame_len speed = geoMove_Len * vid_cap.get( cv2.CAP_PROP_FPS) * 3.6 / (pts[output[4]][0] - frame_idx) ptsSpeed[output[4]].append(speed) pts[output[4]].clear() if len(boxes) > 0: i = int(0) for box in boxes: # 현 위치와 이전 위치를 비교하여 지나갔는지 체크함 (x, y) = (int(box[0]), int(box[1])) # Output 0 1 (w, h) = (int(box[2]), int(box[3])) # Output 2 3 과 같다. color = compute_color_for_labels(indexIDs[i]) if indexIDs[i] in previous_index: previous_box = previous_index[indexIDs[i]] # print() # print('previous_box : ') # print(previous_box) (x2, y2) = (int(previous_box[0]), int(previous_box[1])) (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (int(x + (w - x) / 2), int(y + (h - y) / 2) ) # 현재 박스 p1 = (int(x2 + (w2 - x2) / 2), int(y2 + (h2 - y2) / 2)) # 이전 박스 cv2.line( im0, p0, p1, color, 3 ) # 이전 정보와 비교 : 중앙에 점을 찍어 가면서 (이전 데이터와 검지 데이터의 점) # 클레스 구분 previous_class_id = previous_id[ indexIDs[i]] # 어차피 인덱스 같기 때문에 그냥 넣어줘도 됨 개꿀ㅋ # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(내가 맨든 결과물) # 프레임 수, 인덱스 아이디, 클레스 이름, x좌표, y좌표, w값, h값, 속도값, null, null # with open(txt_path_raw2, 'a') as f: # f.write(('%g ' * 10+ '\n') % (frame_idx, indexIDs[i], previous_class_id, # p0[0], p0[1], box[2], box[3], -1, -1)) # label format for cntr in range(len(Counter_newpoint)): if intersect(p0, p1, Counter_newpoint[cntr][0], Counter_newpoint[cntr] [1]): # 실질적으로 체크함 if previous_class_id == 0: cnt[cntr][1] += 1 elif previous_class_id == 1: cnt[cntr][2] += 1 elif previous_class_id == 2: cnt[cntr][3] += 1 cnt[cntr][0] += 1 i += 1 # 다음 인덱스와 비교하게 만들기 위하여 # draw counter for cntr in range(len(Counter_newpoint)): cv2.putText(im0, 'count_{}_total : {}'.format( cntr + 1, cnt[cntr][0]), (100 + 400 * cntr, 110), cv2.FONT_HERSHEY_DUPLEX, 1.0, (0, 0, 0), 2) # 카운팅 되는거 보이게 cv2.putText(im0, 'count_{}_{} : {}'.format( cntr + 1, names[0], cnt[cntr][1]), (100 + 400 * cntr, 140), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0), 2) # 카운팅 되는거 보이게 cv2.putText(im0, 'count_{}_{} : {}'.format( cntr + 1, names[1], cnt[cntr][2]), (100 + 400 * cntr, 170), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0), 2) # 카운팅 되는거 보이게 cv2.putText(im0, 'count_{}_{} : {}'.format( cntr + 1, names[2], cnt[cntr][3]), (100 + 400 * cntr, 200), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0), 2) # 카운팅 되는거 보이게 t8 = time_synchronized() # ---------------------------------------------------------------------------------------------------------------------- counter end # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, 4:5] cls_id = outputs[:, -1] draw_boxes(im0, bbox_xyxy, cls_id, identities, names, ptsSpeed) t9 = time_synchronized() # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): # 한 라인씩 쓰는 구조 bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[4] classname = output[5] with open( txt_path_raw, 'a' ) as f: # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물) f.write(('%g ' * 6 + '%g' * 1 + '%g ' * 3 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, classname, -1, -1, -1)) # label format # else: # deepsort.increment_ages() t10 = time_synchronized() # Print time (inference + NMS + classify) #print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration t11 = time_synchronized() # Save results (image with detections) # dataset.mode = 'images' # save_path = './track_result/output/{}.jpg'.format(i) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) t12 = time_synchronized() print('inference + NMS + classify (%.3fs)' % (t2 - t1)) print('Yolo + DeepSORT (%.3fs)' % (t3 - t2)) print('find mid point (%.3fs)' % (t4 - t3)) print('삼변측량을 위한 기준거리 산정 (%.3fs)' % (t5 - t4)) print('draw line (%.3fs)' % (t6 - t5) ) # 현재는 정밀도로지도에 있는 모든 점들을 대상 계산중 -> 추후 화면에 표시될 점만 계산하는 작업 필요 print('GCP 점 계산 (%.3fs)' % (t7 - t6)) print('Count & speed (%.3fs)' % (t8 - t7)) print('각차량별 그리기 (%.3fs)' % (t9 - t8)) print('txt 데이터 저장 (%.3fs)' % (t10 - t9)) print('스크린에 표시하기 (%.3fs)' % (t11 - t10)) print('비디오파일로 저장하기 (%.3fs)' % (t12 - t11)) print('one frame done (%.3fs)' % (t12 - t1)) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(config): sent_videos = set() fpeses = [] fps = 0 global flag, vid_writer, lost_ids door_array = [611, 70, 663, 310] around_door_array = [507, 24, 724, 374] low_border = 225 high_border = 342 door_c = find_centroid(door_array) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3]) # socket HOST = "localhost" PORT = 8084 # camera info save_img = True imgsz = (416, 416) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"] webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) # initial objects of classes counter = Counter() VideoHandler = Writer() deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device, weights etc. device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else config["device"]) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Initialize model model = Darknet(config["cfg"], imgsz) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict(torch.load(weights, map_location=device)['model'], strict=False) else: # darknet format load_darknet_weights(model, weights) # Eval mode model.to(device).eval() # Half precision print(half) half = half and device.type != 'cpu' # half precision only supported on CUDA print(half) if half: model.half() if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True view_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img.float()) if device.type != 'cpu' else None # run once with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect((HOST, PORT)) for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): flag_move = False flag_anyone_in_door = False t0_ds = time.time() img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=config["augment"])[0] # to float if half: pred = pred.float() # Apply NMS classes = None if config["classes"] == "None" else config["classes"] pred = non_max_suppression(pred, config["conf_thres"], config["iou_thres"], multi_label=False, classes=classes, agnostic=config["agnostic_nms"]) # Process detections lost_ids = counter.return_lost_ids() for i, det in enumerate(pred): # detections for image i if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s if len(door_array) != 4 or len(around_door_array) != 4: door_array = select_object(im0) print(door_array) save_path = str(Path(out) / Path(p).name) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh # lost_ids = counter.return_lost_ids() bbox_xywh = [] confs = [] if det is not None and len(det): # Rescale boxes from imgsz to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): if names[int(c)] not in config["needed_classes"]: continue n = (det[:, -1] == c).sum() # detections per class s += '%g %s, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: # check if bbox`s class is needed if names[int(cls)] not in config["needed_classes"]: continue x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) detections = torch.Tensor(bbox_xywh) confidences = torch.Tensor(confs) # Pass detections to deepsort # if len(detections) == 0: # continue if len(detections) != 0: outputs_tracked = deepsort.update(detections, confidences, im0) counter.someone_inframe() # draw boxes for visualization if len(outputs_tracked) > 0: bbox_xyxy = outputs_tracked[:, :4] identities = outputs_tracked[:, -1] draw_boxes(im0, bbox_xyxy, identities) counter.update_identities(identities) for bbox_tracked, id_tracked in zip(bbox_xyxy, identities): ratio_initial = find_ratio_ofbboxes(bbox=bbox_tracked, rect_compare=rect_around_door) ratio_door = find_ratio_ofbboxes(bbox=bbox_tracked, rect_compare=rect_door) # чел первый раз в контуре двери if ratio_initial > 0.2: if VideoHandler.counter_frames_indoor == 0: # флаг о начале записи VideoHandler.start_video(id_tracked) flag_anyone_in_door = True elif ratio_initial > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected: VideoHandler.continue_opened_video(id=id_tracked, seconds=3) flag_anyone_in_door = True # elif ratio_detection > 0.6 and counter.people_init.get(id_tracked) == 1: # VideoHandler.continue_opened_video(id=id_tracked, seconds=0.005) if id_tracked not in counter.people_init or counter.people_init[id_tracked] == 0: counter.obj_initialized(id_tracked) if ratio_door >= 0.2 and low_border < bbox_tracked[3] < high_border : # was initialized in door, probably going out of office counter.people_init[id_tracked] = 2 elif ratio_door < 0.4: # initialized in the corridor, mb going in counter.people_init[id_tracked] = 1 else: # res is None, means that object is not in door contour counter.people_init[id_tracked] = 1 counter.frame_age_counter[id_tracked] = 0 counter.people_bbox[id_tracked] = bbox_tracked counter.cur_bbox[id_tracked] = bbox_tracked else: deepsort.increment_ages() if counter.need_to_clear(): counter.clear_all() # Print time (inference + NMS) t2 = torch_utils.time_synchronized() # Stream results vals_to_del = [] for val in counter.people_init.keys(): # check bbox also cur_c = find_centroid(counter.cur_bbox[val]) centroid_distance = np.sum(np.array([(door_c[i] - cur_c[i]) ** 2 for i in range(len(door_c))])) # init_c = find_centroid(counter.people_bbox[val]) # vector_person = (cur_c[0] - init_c[0], # cur_c[1] - init_c[1]) ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val], rect_compare=rect_door) if val in lost_ids and counter.people_init[val] != -1: # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if counter.people_init[val] == 2 \ and ratio < 0.4 and centroid_distance > 5000: # vector_person[1] > 50 and print('ratio out: {}\n centroids: {}\n'.format(ratio, centroid_distance)) counter.get_out() counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел из кабинета") vals_to_del.append(val) elif counter.people_init[val] == 1 \ and ratio >= 0.4 and centroid_distance < 1000: # vector_person[1] < -50 and print('ratio in: {}\n centroids: {}\n'.format(ratio, centroid_distance)) counter.get_in() counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="зашел внутрь") vals_to_del.append(val) lost_ids.remove(val) # TODO maybe delete this condition elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \ and counter.people_init[val] == 2: if ratio < 0.2 and centroid_distance > 10000: # vector_person[1] > 50 and counter.get_out() print('ratio out max frames: ', ratio) counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел") vals_to_del.append(val) counter.age_counter[val] = 0 counter.clear_lost_ids() for valtodel in vals_to_del: counter.delete_person_data(track_id=valtodel) ins, outs = counter.show_counter() cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 3) cv2.rectangle(im0, (int(around_door_array[0]), int(around_door_array[1])), (int(around_door_array[2]), int(around_door_array[3])), (48, 58, 221), 3) cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * im0.shape[0], (255, 255, 255), 3) cv2.line(im0, (door_array[0], low_border), (880, low_border), (214, 4, 54), 4) if VideoHandler.stop_writing(im0): # send_new_posts(video_name, action_occured) sock.sendall(bytes(VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8")) data = sock.recv(100) print('Received', repr(data.decode("utf-8"))) sent_videos.add(VideoHandler.video_name) with open('data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr: wr.write( 'video {}, action: {}, centroid {} \n'.format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance)) VideoHandler = Writer() VideoHandler.set_fps(fps) else: VideoHandler.continue_writing(im0, flag_anyone_in_door) if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration delta_time = (torch_utils.time_synchronized() - t1) # t2_ds = time.time() # print('%s Torch:. (%.3fs)' % (s, t2 - t1)) # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds)) if len(fpeses) < 30: fpeses.append(1 / delta_time) elif len(fpeses) == 30: # fps = round(np.median(np.array(fpeses))) median_fps = float(np.median(np.array(fpeses))) fps = round(median_fps, 2) # fps = 20 print('fps set: ', fps) VideoHandler.set_fps(fps) counter.set_fps(fps) fpeses.append(fps) motion_detection = True else: print('\nflag writing video: ', VideoHandler.flag_writing_video) print('flag stop writing: ', VideoHandler.flag_stop_writing) print('flag anyone in door: ', flag_anyone_in_door) print('counter frames indoor: ', VideoHandler.counter_frames_indoor)
def detect(opt, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = True save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: print('saving img!') if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: print('saving video!') if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): ct = CentroidTracker() out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time # Load model model = torch.load(weights, map_location=device)[ 'model'].float() # load to FP32 model.to(device).eval() # ============================================================================= filepath_mask = 'D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/yolov5/weights/mask.pt' model_mask = torch.load(filepath_mask, map_location = device)['model'].float() model_mask.to(device).eval() if half: model_mask.half() names_m = model_mask.module.names if hasattr(model_mask, 'module') else model_mask.names # ============================================================================= if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = False save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' memory = {} people_counter = 0 in_people = 0 out_people = 0 people_mask = 0 people_none = 0 time_sum = 0 # now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # ============================================================================= pred_mask = model_mask(img)[0] # ============================================================================= # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # ============================================================================= pred_mask = non_max_suppression(pred_mask, 0.4, 0.5, classes = [0, 1, 2], agnostic = None) if pred_mask is None: continue classification = torch.cat(pred_mask)[:, -1] if len(classification) == 0: print("----",None) continue index = int(classification[0]) mask_class = names_m[index] print("MASK CLASS>>>>>>> \n", mask_class) # ============================================================================= # Create the haar cascade # cascPath = "D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/haarcascade_frontalface_alt2.xml" # faceCascade = cv2.CascadeClassifier(cascPath) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) img_center_y = int(im0.shape[0]//2) # line = [(int(im0.shape[1]*0.258),int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))] # print("LINE>>>>>>>>>", line,"------------") # line = [(990, 672), (1072, 24)] line = [(1272, 892), (1800, 203)] # [(330, 468), (704, 468)] print("LINE>>>>>>>>>", line,"------------") cv2.line(im0,line[0],line[1],(0,0,255),5) # ============================================================================= # gray = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) # # Detect faces in the image # faces = faceCascade.detectMultiScale( # gray, # scaleFactor=1.1, # minNeighbors=5, # minSize=(30, 30) # ) # # Draw a rectangle around the faces # for (x, y, w, h) in faces: # cv2.rectangle(im0, (x, y), (x+w, y+h), (0, 255, 0), 2) # text_x = x # text_y = y+h # cv2.putText(im0, mask_class, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) # ============================================================================= if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] bbox_xyxy = [] rects = [] # Is it correct? # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) # label = f'{names[int(cls)]}' xyxy_list = torch.tensor(xyxy).view(1,4).view(-1).tolist() plot_one_box(xyxy, im0, label='person', color=colors[int(cls)], line_thickness=3) rects.append(xyxy_list) obj = [x_c, y_c, bbox_w, bbox_h,int(cls)] #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10) bbox_xywh.append(obj) # bbox_xyxy.append(rec) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = ct.update(rects) # xyxy # outputs = deepsort.update(xywhs, confss, im0) # deepsort index_id = [] previous = memory.copy() memory = {} boxes = [] names_ls = [] # draw boxes for visualization if len(outputs) > 0: # print('output len',len(outputs)) for id_,centroid in outputs.items(): # boxes.append([output[0],output[1],output[2],output[3]]) # index_id.append('{}-{}'.format(names_ls[-1],output[-2])) index_id.append(id_) boxes.append(centroid) memory[index_id[-1]] = boxes[-1] i = int(0) print(">>>>>>>",boxes) for box in boxes: # extract the bounding box coordinates # (x, y) = (int(box[0]), int(box[1])) # (w, h) = (int(box[2]), int(box[3])) x = int(box[0]) y = int(box[1]) # GGG if index_id[i] in previous: previous_box = previous[index_id[i]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) # (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (x,y) p1 = (x2,y2) cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point if intersect(p0, p1, line[0], line[1]): people_counter += 1 print('==============================') print(p0,"---------------------------",p0[1]) print('==============================') print(line[1][1],'------------------',line[0][0],'-----------------', line[1][0],'-------------',line[0][1]) # if p0[1] <= line[1][1]: # in_people +=1 # else: # # if mask_class == 'mask': # # print("COUNTING MASK..", mask_class) # # people_mask += 1 # # if mask_class == 'none': # # people_none += 1 # out_people +=1 if p0[1] >= line[1][1]: in_people += 1 if mask_class == 'mask': people_mask += 1 else: people_none += 1 else: out_people += 1 i += 1 # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() cv2.putText(im0, 'Person [down][up] : [{}][{}]'.format(out_people,in_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) cv2.putText(im0, 'Person [mask][no_mask] : [{}][{}]'.format(people_mask, people_none), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) time_sum += t2-t1 # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': # im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if os.path.exists(out): shutil.rmtree(out) # delete output folder if not os.path.exists(opt.smooth_txt): os.makedirs(opt.smooth_txt) os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # # Second-stage classifier # classify = False # if classify: # modelc = load_classifier(name='resnet101', n=2) # initialize # modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights # modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[np.random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once # dataset contains all the frames (or images) of the video crds_crop = np.empty( (0, 4)) # contains coordinates of a single bbox with the highest conf np_nan = np.empty([1, 4]) # for tracking np_nan[:] = np.nan # for tracking frame_no = 0 for path, img, im0s, vid_cap in dataset: # im0s, img - initial, resized and padded (img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # # Apply Classifier # if classify: # pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string # gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # only when obj is in the frame # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # these will be used for the deepsort input xywhs = xyxy2xywh(det[:, :4].cpu()) confs = det[:, 4].cpu() # Pass detections to deepsort outputs = deepsort.update(xywhs, confs, im0) # this is numpy array ########################################################### # FOR NOW, WE WILL ONLY BE KEEPING THE MOST CONFIDENT VALUE ########################################################### max_conf_id = confs.argmax() # keeping the coordinates row with max conf (det now only keeps one row and four columns) det = det[max_conf_id, :].reshape(1, 6) to_append = xyxy2xywh(det[:, :4].cpu().numpy().reshape( 1, 4).astype(int)) if len(crds_crop) == 0: crds_crop = np.append(crds_crop, to_append).reshape(-1, 4) else: crds_crop = np.append(crds_crop, to_append, axis=0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] # draw_boxes(im0, bbox_xyxy, identities) # no tracking boxes for now # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: if save_txt: # Write to file with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (*xyxy, conf)) # label format if save_img or view_img: # Add bbox to image label = '%s' % (names[int(cls)]) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=2) else: deepsort.increment_ages() if len(crds_crop) == 0: crds_crop = np.append(crds_crop, np_nan).reshape(-1, 4) else: crds_crop = np.append(crds_crop, np_nan, axis=0) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) frame_no += 1 ############################################################################## # this part should be temporary as online filtering will be implemented crds_crop = linear_interp(crds_crop) max_side_bbox = crds_crop[:, 2:].max(axis=1) * 1.2 # 20% relaxation # making sure that the window size does not exceed frame size max_side_bbox = np.where(max_side_bbox < min(w, h), max_side_bbox, min(w, h)) crds_crop = np.c_[crds_crop, max_side_bbox] crds_crop = smoothing(crds_crop, fps) np.savetxt(os.path.join(opt.smooth_txt, os.path.basename(path)[:-4] + '_savgol_' + '.txt'), crds_crop, delimiter=' ') if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform == 'darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(config): sent_videos = set() video_name = "" fpeses = [] fps = 0 # door_array = select_object() # door_array = [475, 69, 557, 258] global flag, vid_writer, lost_ids # initial parameters # door_array = [528, 21, 581, 315] # door_array = [596, 76, 650, 295] # 18 stream door_array = [611, 70, 663, 310] # around_door_array = [572, 79, 694, 306] # # around_door_array = [470, 34, 722, 391] around_door_array = [507, 24, 724, 374] low_border = 225 # door_c = find_centroid(door_array) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3]) # socket HOST = "localhost" PORT = 8084 # camera info save_img = True imgsz = (416, 416) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"] webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) # initial objects of classes counter = Counter(counter_in=0, counter_out=0, track_id=0) VideoHandler = Writer() deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device, weights etc. if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] if config["category_num"] <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"]) if not os.path.isfile('yolo/%s.trt' % config["model"]): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"]) cap = cv2.VideoCapture(config["source"]) if not cap.isOpened(): raise SystemExit('ERROR: failed to open the input video file!') frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) cls_dict = get_cls_dict(config["category_num"]) vis = BBoxVisualization(cls_dict) h, w = get_input_shape(config["model"]) trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"]) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect((HOST, PORT)) img_shape = (288, 288) # for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): while True: ret, im0 = cap.read() if not ret: break preds, confs, clss = perform_detection(frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis) flag_move = False flag_anyone_in_door = False t0 = time.time() ratio_detection = 0 # Process detections lost_ids = counter.return_lost_ids() for i, (det, conf, cls) in enumerate(zip( preds, confs, clss)): if det is not None and len(det): # Rescale boxes from imgsz to im0 size # det = scale_coords(img_shape, det, im0.shape).round() if names[int(cls)] not in config["needed_classes"]: continue # bbox_xywh = [] # confs = [] # Write results if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(det, im0, label=label, color=colors[int(cls)]) detections = torch.Tensor(preds) confidences = torch.Tensor(confs) # Pass detections to deepsort if len(detections) == 0: continue outputs = deepsort.update(detections, confidences, im0) print('detections ', detections) print('outputs ', outputs) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) print('bbox_xyxy ', bbox_xyxy) counter.update_identities(identities) for bbox_tracked, id_tracked in zip(bbox_xyxy, identities): rect_detection = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2], bbox_tracked[3]) inter_detection = rect_detection & rect_around_door if inter_detection: inter_square_detection = rect_square(*inter_detection) cur_square_detection = rect_square(*rect_detection) try: ratio_detection = inter_square_detection / cur_square_detection except ZeroDivisionError: ratio_detection = 0 # чел первый раз в контуре двери if ratio_detection > 0.2: if VideoHandler.counter_frames_indoor == 0: # флаг о начале записи VideoHandler.start_video(id_tracked) flag_anyone_in_door = True elif ratio_detection > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected: VideoHandler.continue_opened_video(id=id_tracked, seconds=3) flag_anyone_in_door = True # elif ratio_detection > 0.6 and counter.people_init.get(id_tracked) == 1: # VideoHandler.continue_opened_video(id=id_tracked, seconds=0.005) if id_tracked not in counter.people_init or counter.people_init[id_tracked] == 0: counter.obj_initialized(id_tracked) rect_head = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2], bbox_tracked[3]) intersection = rect_head & rect_door if intersection: intersection_square = rect_square(*intersection) head_square = rect_square(*rect_head) rat = intersection_square / head_square if rat >= 0.4 and bbox_tracked[3] > low_border : # was initialized in door, probably going out of office counter.people_init[id_tracked] = 2 elif rat < 0.4: # initialized in the corridor, mb going in counter.people_init[id_tracked] = 1 else: # res is None, means that object is not in door contour counter.people_init[id_tracked] = 1 counter.frame_age_counter[id_tracked] = 0 counter.people_bbox[id_tracked] = bbox_tracked counter.cur_bbox[id_tracked] = bbox_tracked else: deepsort.increment_ages() # Print time (inference + NMS) t2 = torch_utils.time_synchronized() # Stream results vals_to_del = [] for val in counter.people_init.keys(): # check bbox also inter = 0 cur_square = 0 ratio = 0 cur_c = find_centroid(counter.cur_bbox[val]) centroid_distance = np.sum(np.array([(door_c[i] - cur_c[i]) ** 2 for i in range(len(door_c))])) # init_c = find_centroid(counter.people_bbox[val]) # vector_person = (cur_c[0] - init_c[0], # cur_c[1] - init_c[1]) rect_cur = Rectangle(counter.cur_bbox[val][0], counter.cur_bbox[val][1], counter.cur_bbox[val][2], counter.cur_bbox[val][3]) inter = rect_cur & rect_door if val in lost_ids and counter.people_init[val] != -1: if inter: inter_square = rect_square(*inter) cur_square = rect_square(*rect_cur) try: ratio = inter_square / cur_square except ZeroDivisionError: ratio = 0 # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if counter.people_init[val] == 2 \ and ratio < 0.4 and centroid_distance > 5000: print('ratio out: {}\n centroids: {}\n'.format(ratio, centroid_distance)) counter.get_out() counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел из кабинета") vals_to_del.append(val) elif counter.people_init[val] == 1 \ and ratio >= 0.4 and centroid_distance < 1000: print('ratio in: {}\n centroids: {}\n'.format(ratio, centroid_distance)) counter.get_in() counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="зашел внутрь") vals_to_del.append(val) lost_ids.remove(val) # TODO maybe delete this condition elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \ and counter.people_init[val] == 2: if inter: inter_square = rect_square(*inter) cur_square = rect_square(*rect_cur) try: ratio = inter_square / cur_square except ZeroDivisionError: ratio = 0 if ratio < 0.2 and centroid_distance > 10000: counter.get_out() print('ratio out max frames: ', ratio) counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел") vals_to_del.append(val) counter.age_counter[val] = 0 counter.clear_lost_ids() for valtodel in vals_to_del: counter.delete_person_data(track_id=valtodel) ins, outs = counter.show_counter() cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 3) cv2.rectangle(im0, (int(around_door_array[0]), int(around_door_array[1])), (int(around_door_array[2]), int(around_door_array[3])), (48, 58, 221), 3) cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * im0.shape[0], (255, 255, 255), 3) cv2.line(im0, (door_array[0], low_border), (880, low_border), (214, 4, 54), 4) if VideoHandler.stop_writing(im0): # send_new_posts(video_name, action_occured) sock.sendall(bytes(VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8")) data = sock.recv(100) print('Received', repr(data.decode("utf-8"))) sent_videos.add(VideoHandler.video_name) with open('../data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr: wr.write('video {}, man {}, centroid {} '.format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance)) VideoHandler = Writer() VideoHandler.set_fps(fps) else: VideoHandler.continue_writing(im0, flag_anyone_in_door) if view_img: cv2.imshow('image', im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration delta_time = (time.time() - t0) # t2_ds = time.time() # print('%s Torch:. (%.3fs)' % (s, t2 - t1)) # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds)) if len (fpeses) < 30: fpeses.append(round(1 / delta_time)) elif len(fpeses) == 30: # fps = round(np.median(np.array(fpeses))) fps = np.median(np.array(fpeses)) # fps = 3 print('fps set: ', fps) VideoHandler.set_fps(fps) counter.set_fps(fps) fpeses.append(fps) motion_detection = True else: print('\nflag writing video: ', VideoHandler.flag_writing_video) print('flag stop writing: ', VideoHandler.flag_stop_writing) print('flag anyone in door: ', flag_anyone_in_door) print('counter frames indoor: ', VideoHandler.counter_frames_indoor)
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) ##### Inicializar DEEPSORT cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string ###### Agregar deepSort bbox_xywh = [] confs = [] # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = deepsort.update(xywhs, confss, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) ###### # # Write results # for *xyxy, conf, cls in reversed(det): # if save_txt: # Write to file # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format # with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') # if save_img or view_img: # Add bbox to image # label = f'{names[int(cls)]} {conf:.2f}' # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) else: deepsort.increment_ages() # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def __init__(self): self.count = 0 self.root = tk.Tk() self.out = "inference/output" self.source = "inference/images" self.weights = "yolov5/weights/yolov5s.pt" self.view_img = False self.save_txt = False self.imgsz = 1088 self.iou_thres = 0.5 self.classes = [0, 1, 2, 3, 5, 7] self.conf_thres = 0.4 self.fourcc = "mp4v" self.config_deepsort = "deep_sort_pytorch/configs/deep_sort.yaml" self.device = "" self.agnostic_nms = False self.augment = False self.two_w, self.three_w, self.four_w, self.truck, self.bus, self.total = ( None, None, None, None, None, None, ) self.count = 0 self.fps = None font = ("Arial", 25) self.root.resizable(0, 0) self.panel = tk.Frame(self.root) self.panel.pack(side="top", padx=10, pady=10) self.canvas = tk.Label(self.panel, text="loading...", anchor="center", font=font, fg="blue") self.canvas.pack(side="left", padx=10, pady=10) self.counting_result = tk.Frame(self.root) self.counting_result.pack(side="bottom", padx=10, pady=10) self.Quit_btn = tk.Button( self.counting_result, text="Quit", font=("Arial", 12), command=self.onClose, bg="red", fg="white", width=6, ) self.Quit_btn.grid(row=2, column=5) # set a callback to handle when the window is closed self.root.wm_title("Traffic") self.root.wm_protocol("WM_DELETE_WINDOW", self.onClose) # Open camera source # self.vid = oneCameraCapture.cameraCapture() self.vs = cv2.VideoCapture("traffic3.mp4") self.webcam = (self.source == "0" or self.source.startswith("rtsp") or self.source.startswith("http") or self.source.endswith(".txt")) # initialize deepsort cfg = get_config() cfg.merge_from_file(self.config_deepsort) self.deepsort = DeepSort( cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True, ) # Initialize self.device = select_device(self.device) if os.path.exists(self.out): shutil.rmtree(self.out) # delete output folder os.makedirs(self.out) # make new output folder self.half = self.device.type != "cpu" # half precision only supported on CUDA # Load model self.model = torch.hub.load("ultralytics/yolov5", "yolov5s", pretrained=True) self.source = "traffic3.mp4" self.save_path = str(Path(self.out)) self.txt_path = str(Path(self.out)) + "/results.txt" self.points = [[5, 100], [400, 100], [730, 300], [5, 300]] self.pts = np.array(self.points, np.int32) self.pts_arr = self.pts.reshape((-1, 1, 2)) self.isClosed = True self.delay = 100 self.update()
def frames(): logger = Logger() print('初始化过了,。。。。。') camera = cv2.VideoCapture(Camera.video_source) if not camera.isOpened(): raise RuntimeError('Could not start camera.') out, weights, imgsz = \ '.inference/output', 'weights/yolov5s.pt', 640 source = "0" # print(source) # print(type(source)) webcam = source.isnumeric() # print('看看webcam:{0}'.format(webcam)) ''' 初始化deepsort ''' # initialize deepsort cfg = get_config() cfg.merge_from_file('deep_sort_pytorch/configs/deep_sort.yaml') deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) device = torch_utils.select_device() # print(weights) # print(os.getcwd()) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder #shutil.rmtree(out) # Load model # google_utils.attempt_download(weights) # model = torch.load(weights, map_location=device)['model'] model = attempt_load(weights, map_location=device) # load FP32 model model.to(device).eval() # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Half precision half = False and device.type != 'cpu' # print('half = ' + str(half)) if half: model.half() # Set Dataloader vid_path, vid_writer = None, None # #if webcam: # view_img = True # cudnn.benchmark = True # set True to speed up constant image size inference # dataset = LoadStreams(source, img_size=imgsz) # else: # save_img = True # # # 如果检测视频的时候想显示出来,可以在这里加一行view_img = True # view_img = True # dataset = LoadImages(source, img_size=imgsz) # vid_path, vid_writer = None, None #dataset = LoadImages(source, img_size=imgsz) dataset = LoadStreams(source, img_size=imgsz) # print('看看dataset:{0}'.format(dataset)) names = model.names if hasattr(model, 'names') else model.modules.names # print('----') # print(names) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): # print('path:{0}'.format(path)) # print('im0s:{0}'.format(im0s)) # print('im0s类型:{0}'.format(type(im0s))) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.4, 0.5, fast=True, classes=None, agnostic=False) t2 = torch_utils.time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) for i, det in enumerate(pred): # detections per image #p, s, im0 = path, '', im0s p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() #save_path = str(Path(out) / Path(p).name) s += '%gx%g ' % img.shape[2:] # print string #gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() #for c in det[:, -1].unique(): #probably error with torch 1.5 for c in det[:, -1].detach().unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %s, ' % (n, names[int(c)]) # add to string # --- linjie bbox_xywh = [] confs = [] clses = [] # for *xyxy, conf, cls in det: # label = '%s %.2f' % (names[int(cls)], conf) # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Write results for *xyxy, conf, cls in reversed(det): # -- linjie deepsort x_c, y_c, bbox_w, bbox_h = Camera.bbox_rel(*xyxy) obj = [x_c, y_c, bbox_w, bbox_h] bbox_xywh.append(obj) confs.append([conf.item()]) clses.append([cls.item()]) label = '%s %.2f' % (names[int(cls)], conf) print('看看这次打的标签:{0}'.format(label)) #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # 判断标签是否为人 --linjie if label is not None: if (label.split())[0] == 'person': logger.info('当前进程:{0}.遇到了人'.format( os.getpid())) #print('标签是人') # distancing(people_coords, im0, dist_thres_lim=(200, 250)) # people_coords.append(xyxy) #plot_one_box(xyxy, im0, line_thickness=3) plot_dots_on_people(xyxy, im0) # ---linjie deepsort xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) clses = torch.Tensor(clses) outputs = deepsort.update(xywhs, confss, clses, im0) # draw boxes for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, 4] clses = outputs[:, 5] scores = outputs[:, 6] stays = outputs[:, 7] Camera.draw_boxes(im0, bbox_xyxy, [names[i] for i in clses], scores, identities) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results # if view_img: # cv2.imshow(p, im0) # if cv2.waitKey(1) == ord('q'): # q to quit # raise StopIteration yield cv2.imencode('.jpg', im0)[1].tobytes()