def run_deep_sort(self, frame, out_scores, out_boxes): if out_boxes == []: self.tracker.predict() print('No detections') trackers = self.tracker.tracks return trackers detections = np.array(out_boxes) # features = self.encoder(frame, detections.copy()) processed_crops = self.pre_process(frame, detections).cuda() # processed_crops = self.gaussian_mask * processed_crops features = self.encoder.forward_once(processed_crops) features = features.detach().cpu().numpy() if len(features.shape) == 1: features = np.expand_dims(features, 0) dets = [Detection(bbox, score, feature) \ for bbox, score, feature in \ zip(detections, out_scores, features)] self.tracker.predict() self.tracker.update(dets) return self.tracker, dets
def create_detections(detection_mat, frame_idx, min_height=0): """Create detections for given frame index from the raw detection matrix. Parameters ---------- detection_mat : ndarray Matrix of detections. The first 10 columns of the detection matrix are in the standard MOTChallenge detection format. In the remaining columns store the feature vector associated with each detection. frame_idx : int The frame index. min_height : Optional[int] A minimum detection bounding box height. Detections that are smaller than this value are disregarded. Returns ------- List[tracker.Detection] Returns detection responses at given frame index. """ frame_indices = detection_mat[:, 0].astype(np.int) mask = frame_indices == frame_idx detection_list = [] for row in detection_mat[mask]: bbox, confidence, feature = row[2:6], row[6], row[10:] if bbox[3] < min_height: continue detection_list.append(Detection(bbox, confidence, feature)) return detection_list
def run_deep_sort(self, frame, out_scores, out_boxes, out_classes): out_boxes = self.format_yolo_output(out_boxes) if out_boxes == []: self.tracker.predict() trackers = self.tracker.tracks return trackers detections = np.array(out_boxes) #features = self.encoder(frame, detections.copy()) features = self.encoder.extract_features(frame, detections) #print(frame.shape) detections = [Detection(bbox, score, feature,classname) \ for bbox,score, feature,classname in\ zip(detections,out_scores, features,out_classes)] outboxes = np.array([d.tlwh for d in detections]) outscores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(outboxes, 0.8, outscores) detections = [detections[i] for i in indices] self.tracker.predict() self.tracker.update(detections) trackers = self.tracker.tracks return trackers
def sort(boxResults, imgcv): detections = [] scores = [] nms_max_overlap = 0.1 if type(imgcv) is not np.ndarray: imgcv = cv2.imread(imgcv) h, w, _ = imgcv.shape thick = int((h + w) // 300) for boxresult in boxResults: # left, right, top, bot, mess, max_indx, confidence = boxResults max_indx = 20 print(boxresult) top, left, bot, right, mess, confidence = boxresult['topleft'][ 'y'], boxresult['topleft']['x'], boxresult['bottomright'][ 'y'], boxresult['bottomright']['x'], boxresult[ 'label'], boxresult['confidence'] mess = boxresult['label'] # if mess not in self.FLAGS.trackObj : # continue # detections.append(np.array([right-left,bot-top,left,top]).astype(np.float64)) detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) print( np.array([left, top, right - left, bot - top]).astype(np.float64)) print(imgcv.shape) detections = np.array(detections) scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks for track in trackers: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), thick // 3) cv2.putText(imgcv, id_num, (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, (255, 255, 255), thick // 6) return imgcv
def callback(self, msg): self.img = cv2.imdecode(np.fromstring(msg.data, np.uint8), 1) lstDetections = msg.bounding_boxes lstDetsDeepSort = [] if (lstDetections): for det in lstDetections: if det.Class == 'person': # Deep Sort Bounding Boxes # Use the format TOP LEFT WIDTH HEIGHT (tlwh) dsWidth = det.xmax - det.xmin dsHeight = det.ymax - det.ymin lstDetsDeepSort.append( [det.xmin, det.ymin, dsWidth, dsHeight]) self.detectionAndId = [] #region DEEPSORT if self.frameCount % 3 == 0: features = self.encoder(self.img, lstDetsDeepSort) # Create DeepSort detections trackedObjects = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(lstDetsDeepSort, features) ] self.tracker.predict() self.tracker.update(trackedObjects) #endregion self.displayDetections() msgDetectionAndID = DetectionAndID() msgDetectionAndID.header = msg.header msgDetectionAndID.detections = self.detectionAndId #region DISPLAY if self.frameCount % 10 == 0: timer = time.time() - self.timePrev self.timePrev = time.time() self.intFPS = int(10 / timer) # Print FPS cv2.putText(self.img, 'FPS: {}'.format(self.intFPS), (self.img.shape[1] - 100, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) cv2.imshow('yact: people_tracker.py', self.img) cv2.waitKey(3) self.frameCount += 1
def create_detections(detection_mat, min_height=0, frame_idx=1): """Create detections for given frame index from the raw detection matrix. """ frame_indices = detection_mat[:, 0].astype(np.int) mask = frame_indices == frame_idx detection_list = [] for row in detection_mat[mask]: bbox, confidence, feature = row[2:6], row[6], row[10:] if bbox[3] < min_height: continue detection_list.append(Detection(bbox, confidence, feature)) return detection_list
def return_tracking_id(self, bboxes, frame): detections = [] scores = [] new_bboxes = [] nms_max_overlap = 0.1 if type(frame) is not np.ndarray: imgcv = cv2.imread(frame) else: imgcv = frame h, w, _ = frame.shape thick = int((h + w) // 100) for b in bboxes[0]: left, top, right, bot, confidence, _, _ = b detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) detections = np.array(detections) if detections.shape[0] == 0: return frame scores = np.array(scores) features = self.encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] self.tracker.predict() self.tracker.update(detections) trackers = self.tracker.tracks for track in trackers: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) new_bboxes.append( (id_num, int(bbox[0]), int(bbox[1]), (int(bbox[2])), int(bbox[3]), track.confidence)) print(new_bboxes) return new_bboxes
def run_deep_sort(self, frame, out_boxes, out_scores, classIDs, scales, ids, features): if out_boxes == []: self.tracker.predict() print('No detections') trackers = self.tracker.tracks return trackers wh = np.flip(frame.shape[0:2]) out_boxes[:, 0:2] = (out_boxes[:, 0:2] * wh).astype(float) out_boxes[:, 2:4] = (out_boxes[:, 2:4] * wh).astype(float) # Give format boxes out_boxes = self.format_boxes(out_boxes) # Create a detection object to parse in deepsort dets = [Detection(bbox, score, feature) \ for bbox, score, feature in\ zip(out_boxes, out_scores, features)] outboxes = np.array([d.tlwh for d in dets]) outscores = np.array([d.confidence for d in dets]) # Non max suppression including confidence # works best using pixel coordinates, 0.3<=overlap<=0.5 indices = non_max_suppression(outboxes, 0.4, outscores) dets = [dets[i] for i in indices] classIDs = [classIDs[i] for i in indices] scales = [scales[i] for i in indices] ids = [ids[i] for i in indices] detections_class = (dets, classIDs, scales, ids) # DeepSort cycle self.tracker.predict() self.tracker.update(dets) return self.tracker, detections_class
def run_deep_sort(self, frame, out_scores, out_boxes): if len(out_boxes)==0: self.tracker.predict() print('No detections') trackers = self.tracker return trackers, [] detections = np.array(out_boxes) #features = self.encoder(frame, detections.copy()) print('size ', detections.shape, frame.shape) if detections.shape[0]==0: self.tracker.predict() print('No detections') trackers = self.tracker return trackers, [] processed_crops = self.pre_process(frame,detections).cuda() processed_crops = self.gaussian_mask * processed_crops features = self.encoder.forward_once(processed_crops) features = features.detach().cpu().numpy() if len(features.shape)==1: features = np.expand_dims(features,0) dets = [Detection(bbox, score, feature) \ for bbox,score, feature in\ zip(detections,out_scores, features)] outboxes = np.array([d.tlwh for d in dets]) outscores = np.array([d.confidence for d in dets]) indices = prep.non_max_suppression(outboxes, 0.8,outscores) dets = [dets[i] for i in indices] self.tracker.predict() self.tracker.update(dets) return self.tracker,dets
def run_deep_sort(self, frame, out_scores, out_boxes, classes=[]): if out_boxes == []: self.tracker.predict() # print('No detections') # trackers = self.tracker.tracks return self.tracker, [] # IN CASE no classes are provided, use default value if len(classes) != len(out_boxes): classes = ['noclass'] * len(out_boxes) detections = np.array(out_boxes) #features = self.encoder(frame, detections.copy()) processed_crops = self.pre_process(frame, detections).cuda() processed_crops = self.gaussian_mask * processed_crops features = self.encoder.forward_once(processed_crops) features = features.detach().cpu().numpy() if len(features.shape) == 1: features = np.expand_dims(features, 0) dets = [Detection(bbox, score, feature, class_) \ for bbox,score, feature, class_ in\ zip(detections,out_scores, features, classes)] outboxes = np.array([d.tlwh for d in dets]) outscores = np.array([d.confidence for d in dets]) indices = prep.non_max_suppression(outboxes, 0.8, outscores) dets = [dets[i] for i in indices] self.tracker.predict() self.tracker.update(dets) return self.tracker, dets
def run_deep_sort(self, frame, out_scores, out_boxes): if out_boxes == []: self.tracker.predict() tracks = self.tracker.tracks return tracks detections = self.format_yolo_output(out_boxes) detections = np.array(detections) #features = self.encoder(frame, detections.copy()) processed_crops = self.pre_process(frame, detections).cuda() processed_crops = self.gaussian_mask * processed_crops features = self.encoder.forward_once(processed_crops) features = features.detach().cpu().numpy() if len(features.shape) == 1: features = np.expand_dims(features, 0) dets = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, out_scores, features) ] outboxes = np.array([d.tlwh for d in dets]) outscores = np.array([d.confidence for d in dets]) indices = prep.non_max_suppression(outboxes, 0.8, outscores) dets = [dets[i] for i in indices] self.tracker.predict() self.tracker.update(dets) return self.tracker.tracks #, dets
def update(self, subject): if (subject.END): self.END = True self.notify() return # update tracker with detection from detector boundBoxes = subject.rois confidences = subject.scores if (self.applyMask): features = self.encoder(subject.image, np.array(boundBoxes), subject.masks) else: features = self.encoder(subject.image, np.array(boundBoxes)) detections = [ Detection(bbox, confidence, feature) for bbox, confidence, feature in zip( boundBoxes, confidences, features) ] self.tracker.predict() self.tracker.update(detections) # extract bounding boxes and Ids self.image = subject.image self.objectBoundingBoxes = [] self.objectIds = [] tracks = self.tracker.tracks for track in tracks: if not track.is_confirmed() or track.time_since_update > 1: continue self.objectBoundingBoxes.append(track.to_tlbr()) self.objectIds.append(str(track.track_id)) # notify deep sort event listeners self.notify()
def postprocess(self, net_out, im, frame_id=0, csv_file=None, csv=None, mask=None, encoder=None, tracker=None, save=False): ids_in_current_frame = [] ids_in_current_frame_set = set([]) # counter() videoName = os.path.basename(self.FLAGS.demo) videoName = videoName[:-4] """ Takes net output, draw net_out, save to disk """ boxes = self.findboxes(net_out) # meta meta = self.meta nms_max_overlap = 0.1 threshold = meta['thresh'] colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape thick = int((h + w) // 300) resultsForJSON = [] if not self.FLAGS.track: for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.json: resultsForJSON.append({ "label": mess, "confidence": float('%.2f' % confidence), "topleft": { "x": left, "y": top }, "bottomright": { "x": right, "y": bot } }) continue if self.FLAGS.display: cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx], thick // 6) else: if not ds: print( "ERROR : deep sort or sort submodules not found for tracking please run :" ) print("\tgit submodule update --init --recursive") print("ENDING") exit(1) detections = [] scores = [] for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.trackObj != mess: continue if self.FLAGS.tracker == "deep_sort": detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.FLAGS.tracker == "sort": detections.append( np.array([left, top, right, bot]).astype(np.float64)) if len(detections) < 5 and self.FLAGS.BK_MOG: detections = detections + extract_boxes(mask) detections = np.array(detections) if self.FLAGS.tracker == "deep_sort": scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks elif self.FLAGS.tracker == "sort": trackers = tracker.update(detections) for track in trackers: if self.FLAGS.tracker == "deep_sort": if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) elif self.FLAGS.tracker == "sort": bbox = [ int(track[0]), int(track[1]), int(track[2]), int(track[3]) ] id_num = str(int(track[4])) """ Entry point to all application functions """ # ------------------------------------------------------------------------- # ------------------------------------------------------------------------- """ if counting cars has been chosen by user then this block gets executed PER OBJECT ID AFTER TRACKING """ if self.FLAGS.counting_cars: saveAs = os.path.basename(self.FLAGS.demo) saveAs = saveAs[:-4] count, line = countingCars.count(int(frame_id), int(id_num), bbox, h) f = open( "data/" + saveAs + "/output" + "/car_count_{}.txt".format(saveAs), 'w') f.write('Cars_Counted: {}'.format(count)) # ------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------ video_name = os.path.splitext(os.path.basename(self.FLAGS.demo))[0] """ if speed estimation has been chosen by user then this block gets executed PER OBJECT ID AFTER TRACKING """ if self.FLAGS.speed_estimation: global speed_estimation_result speed_estimation_result = speed_estimation.get_speed( video_name, np.copy(imgcv), int(frame_id), int(id_num), [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) saveAs = os.path.basename(self.FLAGS.demo) saveAs = saveAs[:-4] # ------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------- """ if direction detection has been chosen by user then this block gets executed PER OBJECT ID AFTER TRACKING """ if self.FLAGS.direction_detection: global direction_detection_result direction_detection_result = direction_detection.get_direction( video_name, np.copy(imgcv), int(frame_id), int(id_num), [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) # ------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------ """ if traffic signal violation detection has been chosen by user then this block gets executed PER OBJECT ID AFTER TRACKING """ if self.FLAGS.traffic_signal_violation_detection: global traffic_signal_violation_result, direction_detection_result traffic_signal_violation_result = \ traffic_signal_violation_detection.detect_red_violation(video_name, imgcv, int(frame_id), int(id_num), [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])], direction_detection_result) # ------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------ if self.FLAGS.csv: csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ]) csv_file.flush() """ if display flag is enabled, it annotates the car """ if self.FLAGS.display: car_violated = False cv2.putText(imgcv, id_num, (int(bbox[0]), int(bbox[1]) - 12), 0, int(1e-3 * h), (255, 255, 255), int(thick // 6)) if self.FLAGS.counting_cars: text_size = cv2.getTextSize(str(count), 0, 2, 2) pt1 = 90, 180 pt2 = pt1[0] + 10 + text_size[0][0], pt1[ 1] + 10 + text_size[0][1] center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1] cv2.line(imgcv, (0, line), (1920, line), (243, 150, 33), 5) cv2.rectangle(imgcv, pt1, pt2, (243, 150, 33), -1) cv2.putText(imgcv, str(count), center, 0, 2, (34, 87, 255), 2) if self.FLAGS.speed_estimation: cv2.rectangle(imgcv, (10, 10), (30, 30), (30, 44, 243), -1) cv2.putText(imgcv, "speed violation", (40, 30), 0, int(1e-3 * h), (30, 44, 243), int(thick // 4)) if self.FLAGS.direction_detection: cv2.rectangle(imgcv, (10, 40), (30, 60), (249, 0, 213), -1) cv2.putText(imgcv, "direction violation", (40, 60), 0, int(1e-3 * h), (249, 0, 213), int(thick // 4)) if self.FLAGS.traffic_signal_violation_detection: cv2.rectangle(imgcv, (10, 70), (30, 90), (0, 255, 255), -1) cv2.putText(imgcv, "traffic signal violation", (40, 90), 0, int(1e-3 * h), (0, 255, 255), int(thick // 4)) # ------------------------------------------------------------------------------------- if speed_estimation_result > speed_estimation.speed_threshold: cv2.putText(imgcv, str(speed_estimation_result), (int(bbox[0] + 50), int(bbox[1] - 12)), cv2.FONT_HERSHEY_SIMPLEX, int(1e-3 * h), (0, 255, 255), int(thick // 6)) cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (30, 44, 243), thick // 3) # print("HELLO:::::::::::::::::::::: speed: {}".format(speed_estimation_result)) self.__speed_estimation_result = speed_estimation_result if not os.path.exists('data/' + saveAs + "/output/hscd/videos/" + str(id_num)): os.makedirs('data/' + saveAs + "/output/hscd/videos/" + str(id_num)) saveAs = os.path.basename(self.FLAGS.demo) saveAs = saveAs[:-4] vCars = open( "data/" + saveAs + "/output/hscd/videos/" + str(id_num) + "/" + "car_id_{}.txt".format(id_num), 'a') vCars.write('frame_{}.jpg\n'.format(frame_id)) vCars.close() path = "data/" + saveAs + "/output/hscd/videos/" + str( id_num) image_name_string = "frame_" + str(frame_id) + ".jpg" cv2.imwrite(os.path.join(path, image_name_string), imgcv) ids_in_current_frame_set.add(id_num) active_ids_set.add(id_num) ids_in_current_frame.append(int(id_num)) active_ids.append(int(id_num)) # frames_of_tracks[int(id_num)].append(frame_id) car_violated = True elif speed_estimation.speed_threshold > speed_estimation_result > 0: cv2.putText(imgcv, id_num, (int(bbox[0]), int(bbox[1]) - 12), 0, int(1e-3 * h), (255, 255, 255), int(thick // 6)) cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), thick // 3) car_violated = False # ------------------------------------------------------------------------------------- if direction_detection_result == 1: cv2.rectangle(imgcv, (int(bbox[0]) + 10, int(bbox[1]) + 10), (int(bbox[2]) - 10, int(bbox[3]) - 10), (249, 0, 213), 4) car_violated = True # ------------------------------------------------------------------------------------- if traffic_signal_violation_result: cv2.rectangle(imgcv, (int(bbox[0]) - 10, int(bbox[1]) - 10), (int(bbox[2]) + 10, int(bbox[3]) + 10), (0, 255, 255), 4) car_violated = True if not car_violated: cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), thick // 3) # inactive_ids = active_ids - ids_in_current_frame inactive_id_set = active_ids_set - ids_in_current_frame_set print(frame_id, inactive_id_set) # if not os.path.exists('data/' + videoName + "/output/hscd/alerts/" + str(id_num)): # os.makedirs('data/' + videoName + "/output/hscd/alerts/" + str(id_num)) alertsFile = open( "data/" + videoName + "/output/hscd/alerts/" + "alert.txt", 'a') # for frame in frames: # writer.write(frame.astype('uint8')) # writer.release() # del temp for object_id in inactive_id_set: # alertsFile.write('alert for car_id: {}\n'.format(object_id)) alertsFile.write( 'Speed Violation detected on {}, for car tagged as {}, at time: {}. Detected Speed: {}. For details, check URL: {}' .format( videoName, object_id, "10 sec", self.__speed_estimation_result, 'data/' + videoName + '/output/videos/' + object_id + '/' + object_id + '.mp4\n')) path = "data/" + videoName + "/output/hscd/videos/" + str( object_id) + "/" img_path = os.path.join(self.FLAGS.output_dir, path) frame_path = path + "car_id_{}.txt".format(object_id) op_file = "/home/anuj/Desktop/test_{}".format(object_id) # writer = cv2.VideoWriter(op_file+".avi", cv2.VideoWriter_fourcc(*"MJPG"), 15, (1080, 1920)) fps = 15 out = cv2.VideoWriter(op_file + ".avi", cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (w, h)) with open(frame_path) as fp: for line in fp: line = line.strip() v_image = cv2.imread(img_path + line) # writer.write(v_image.astype('uint8')) out.write(v_image) # cv2.imshow("dasd", img) # cv2.waitKey(1) # writer.release() out.release() # write video # skvideo.io.vwrite("/home/anuj/Desktop/outputvideo{}.mp4".format(object_id), v_image) active_ids_set.remove(object_id) alertsFile.close() # inactive_ids = [i for i in active_ids if i not in ids_in_current_frame] # inactive_ids = list(set(inactive_ids)) # print inactive_ids # for object_id in inactive_ids: # print(frames_of_tracks[object_id]) # print("object_id:", object_id) # draw video corresponding to object_id # delete object_id from active_ids # active_ids = list(filter(lambda a: a != object_id, active_ids)) # active_ids = [x for x in active_ids if x != object_id] if not save: return imgcv outfolder = os.path.join(self.FLAGS.imgdir, 'out') img_name = os.path.join(outfolder, os.path.basename(im)) if self.FLAGS.json: textJSON = json.dumps(resultsForJSON) textFile = os.path.splitext(img_name)[0] + ".json" with open(textFile, 'w') as f: f.write(textJSON) return cv2.imwrite(img_name, imgcv) print("CAME HERE 2")
def postprocess(self, net_out, im, video_id, frame_id=0, csv_file=None, csv=None, mask=None, encoder=None, tracker=None, previous_frame=None, disable_facial=False): """ Takes net output, draw net_out, save to disk """ start = current_milli_time() boxes = self.findboxes(net_out) end = current_milli_time() time_elapsed = (end - start) / 1000 #TODO: remove this #print("self.findboxes(net_out) took: {}".format(time_elapsed)) # meta meta = self.meta nms_max_overlap = 0.1 threshold = meta["thresh"] colors = meta["colors"] labels = meta["labels"] if type(im) is not np.ndarray: im = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape thick = int((h + w) // 300) resultsForJSON = [] #Face Detection Code label_person_name = "" faces = [] labels = [] if self.FLAGS.face_recognition and not disable_facial: start = current_milli_time() # min_area=(3000/800)*im.shape[1] # temp = 1 frame_grayscale = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) # if previous_frame is not None: # temp = self.background_subtraction(previous_frame, frame_grayscale, min_area) # if temp==1: faces = self.detect_face(frame_grayscale) if len(faces) > 0: labels, confs = self.recognize_face(imgcv, faces) self.put_label_on_face(im, faces, labels, confs) end = current_milli_time() time_elapsed = (end - start) / 1000 #TODO: remove this #print("face_recognition of single frame took: " + str(time_elapsed)) speech_actions = None if not self.FLAGS.track: for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, label, max_indx, confidence = boxResults if self.FLAGS.json: resultsForJSON.append({ "label": label, "confidence": float("%.2f" % confidence), "topleft": { "x": left, "y": top }, "bottomright": { "x": right, "y": bot } }) continue if self.FLAGS.display: cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, label, (left, top - 12), 0, 1e-3 * h, colors[max_indx], thick // 3) else: detections = [] scores = [] global old_tracked_objects global tracked_objects old_tracked_objects = tracked_objects tracked_objects = {} start = current_milli_time() for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, label, max_indx, confidence = boxResults bbox = [left, top, right, bot] if label not in self.FLAGS.trackObj: continue if self.FLAGS.tracker == "deep_sort": detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.FLAGS.tracker == "sort": detections.append( np.array([left, top, right, bot, confidence, label], dtype=object)) end = current_milli_time() time_elapsed = (end - start) / 1000 #TODO: remove this #print("running self.process_box on all detections took: {}".format(time_elapsed)) if len(detections) < 3 and self.FLAGS.BK_MOG: detections = detections + extract_boxes(self, mask) detections = np.array(detections) if detections.shape[0] == 0: return imgcv, None if self.FLAGS.tracker == "deep_sort" and tracker != None: scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks elif self.FLAGS.tracker == "sort" and tracker != None: start = current_milli_time() trackers = tracker.update(detections) end = current_milli_time() time_elapsed = (end - start) / 1000 #TODO: remove this #print("sort_tracker.update(detections) took: {}".format(time_elapsed)) if tracker != None: start = current_milli_time() for track in trackers: label = "" bbox = [] if self.FLAGS.tracker == "deep_sort": if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) elif self.FLAGS.tracker == "sort": bbox = [ int(track[0]), int(track[1]), int(track[2]), int(track[3]) ] id_num = str(track[-1]) label = track[5] if self.FLAGS.csv: csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ]) csv_file.flush() object_key = label + " " + id_num tracked_objects[object_key] = { "bbox": { "topleft": { "x": bbox[0], "y": bbox[1] }, "bottomright": { "x": bbox[2], "y": bbox[3] } } } if object_key in old_tracked_objects and "person_name" in old_tracked_objects[ object_key]: tracked_objects[object_key][ "person_name"] = old_tracked_objects[object_key][ "person_name"] label = tracked_objects[object_key]["person_name"] else: if label == "person": person_name_label = self.get_label_for_person( faces, labels, bbox) if person_name_label is not None: tracked_objects[object_key][ "person_name"] = person_name_label label = person_name_label if (self.FLAGS.display or self.FLAGS.saveVideo): cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), thick // 3) cv2.putText(imgcv, label, (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, (255, 255, 255), thick // 6) end = current_milli_time() time_elapsed = (end - start) / 1000 #TODO: remove this #print("drawing all tracked objects took: {}".format(time_elapsed)) start = current_milli_time() speech_actions = object_detection_speech(self.FLAGS.speech, tracked_objects, old_tracked_objects, h, w) end = current_milli_time() time_elapsed = (end - start) / 1000 #TODO: remove this #print("object detection and speech took: " + str(time_elapsed)) if len(speech_actions) > 0: speech_actions = { "isStart": False, "isEnd": False, "timestamp": int(round(timer())), "video_id": video_id, "actions": speech_actions } if self.FLAGS.upload and len(speech_actions) > 0: socketio_json = { "isStart": False, "isEnd": False, "timestamp": int(round(timer())), "video_id": video_id, "actions": speech_actions } with SocketIO( "http://ec2-18-191-1-128.us-east-2.compute.amazonaws.com", 80, LoggingNamespace) as socketIO: socketIO.emit("video_data_point", socketio_json) #Sidewalk Detection if self.FLAGS.sidewalk_detection: _, command = run_sidewalk_detection(im) #print("navigation command: " + command) return imgcv, speech_actions
def postprocess(self, net_out, im, frame_id=0, csv_file=None, csv=None, mask=None, encoder=None, tracker=None, save=False): """ Takes net output, draw net_out, save to disk """ boxes = self.findboxes(net_out) # meta meta = self.meta nms_max_overlap = 0.1 threshold = meta['thresh'] colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape thick = int((h + w) // 300) resultsForJSON = [] if not self.FLAGS.track: for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.json: resultsForJSON.append({ "label": mess, "confidence": float('%.2f' % confidence), "topleft": { "x": left, "y": top }, "bottomright": { "x": right, "y": bot } }) continue if self.FLAGS.display or self.FLAGS.saveVideo: cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx], thick // 3) else: if not ds: print( "ERROR : deep sort or sort submodules not found for tracking please run :" ) print("\tgit submodule update --init --recursive") print("ENDING") exit(1) detections = [] scores = [] for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if mess not in self.FLAGS.trackObj: continue if self.FLAGS.tracker == "deep_sort": detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.FLAGS.tracker == "sort": detections.append( np.array([left, top, right, bot]).astype(np.float64)) if len(detections) < 3 and self.FLAGS.BK_MOG: detections = detections + extract_boxes(self, mask) detections = np.array(detections) if detections.shape[0] == 0: return imgcv if self.FLAGS.tracker == "deep_sort": scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks elif self.FLAGS.tracker == "sort": trackers = tracker.update(detections) for track in trackers: if self.FLAGS.tracker == "deep_sort": if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) elif self.FLAGS.tracker == "sort": bbox = [ int(track[0]), int(track[1]), int(track[2]), int(track[3]) ] id_num = str(int(track[4])) if self.FLAGS.csv: csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ]) csv_file.flush() if self.FLAGS.display or self.FLAGS.saveVideo: global person_count global dict # Black # LightPink # Crimson # Purple # Blue # Cyan # SeaGreen # Yellow # DarkOrange # Gray list_color = [(0, 0, 0), (255, 182, 193), (128, 0, 128), (255, 0, 255), (0, 0, 255), (0, 255, 255), (46, 139, 87), (255, 255, 0), (255, 140, 0), (128, 128, 128)] id_person = int(update_csv(int(id_num))) id_person_color = id_person % len(list_color) cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), list_color[id_person_color], thick // 3) # init if len(dict) == 0: for i in range(0, 99999): dict[i] = [] person_count.append(0) center_x = (int(bbox[0]) + (int(bbox[2]) - int(bbox[0])) / 2) center_y = (int(bbox[1]) + (int(bbox[3]) - int(bbox[1])) / 2) dict[id_person].append((center_x, center_y)) #print id_person,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) for i in range(0, len(dict[id_person])): cv2.circle(imgcv, dict[id_person][i], 1, list_color[id_person_color], thick // 5) if i > 0: cv2.line(imgcv, dict[id_person][i - 1], dict[id_person][i], list_color[id_person_color], 5) person_count[id_person] = person_count[id_person] + 1 # frame num = 200 10s if person_count[id_person] % 200 == 0: person_count[id_person] = 0 dict[id_person] = [] #cv2.putText(imgcv, id_num,(int(bbox[0]), int(bbox[1]) - 12),0, 1e-3 * h, (255,0,255),thick//6) # show person id cv2.putText(imgcv, str(id_person), (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, list_color[id_person_color], thick // 3) # set font font = cv2.FONT_HERSHEY_TRIPLEX # count the person mycount = update_csv(0) # show to UI cv2.putText(imgcv, 'gong count: ' + str(mycount), (10, 70), 0, 1e-3 * h, (0, 0, 255), thick // 6) return imgcv
def vis_detections_video(im, class_name, dets, csv_file, csv, frame_id, thresh=0.5): """Draw detected bounding boxes.""" nms_max_overlap = 0.6 metric = nn_matching.NearestNeighborDistanceMetric("cosine", 0.2, 100) tracker = Tracker(metric) detections = [] scores = [] h, w, _ = im.shape thick = int((h + w) // 300) inds = np.where(dets[:, -1] >= thresh)[0] if len(inds) == 0: return im for i in inds: scores.append(dets[i, -1]) for i in inds: bbox = dets[i, :4] boxResults = process_box(bbox, scores, h, w, thresh) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) scores = np.array(scores) detections = np.array(detections) features = deep_sort_encode(im, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks for track in trackers: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ]) csv_file.flush() cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 255), thick // 3) cv2.putText(im, id_num, (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, (255, 255, 255), thick // 6) # cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,0,255),2) # cv2.rectangle(im,(int(bbox[0]),int(bbox[1])-10),(int(bbox[0]+200),int(bbox[1])+10),(10,10,10),-1) # cv2.putText(im, id_num,(int(bbox[0]),int(bbox[1]-2)),cv2.FONT_HERSHEY_SIMPLEX,.45,(255,255,255))#,cv2.CV_AA) return im
def detect_n_track_video(detect_net, tracker_obj, team_classifier_obj, vidname, detection_threshold=0.8, scale=1.0, skip=0, team_classify=False, fps=24, reuse_occluded_features=False, dump_crops=False): frame_num = 0 cap = cv2.VideoCapture(vidname) W = int(cap.get(3)) H = int(cap.get(4)) video_size = [int(W), int(H)] length = int(cap.get(7)) vid_fps = fps or int(round(cap.get(5))) print(video_size) print("Video Size :", video_size) print("FPS : ", vid_fps) tracker = tracker_obj.tracker out_folder = vidname.split('.')[0] out_filename = out_folder.split('/')[-1] + '_out.avi' #out = cv2.VideoWriter(out_filename,cv2.VideoWriter_fourcc('X','2', '6', '4'), vid_fps, (int(W*scale),int(H*scale))) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(out_filename, fourcc, vid_fps, (int(W * scale), int(H * scale))) prev_ID_centroid_dict = {} overall_centroid_dict = {} overall_ID_bbox_dict = {} overall_ID_feature_dict = {} occluded_boxes_retained = [] occluded_features_retained = [] prev_scaled_boxes = [] prev_boxes = [] prev_boxes1 = [] prev_boxes2 = [] while True: if frame_num % (skip + 1) == 0: ret, image = cap.read() if not ret: break #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) sc_width, sc_height = [int(W * scale), int(H * scale)] if scale != 1: print(sc_width, sc_height) image = cv2.resize(image, (sc_width, sc_height), interpolation=cv2.INTER_AREA) #image[0:260,:] = 0 detections = image_fprop(detect_net, image) num_detections = int(detections['num_detections']) print("Num detections:", detections['num_detections']) tf_boxes = detections['detection_boxes'][ 0:num_detections] * np.array( [sc_height, sc_width, sc_height, sc_width]) unique_indices = filter_repeated_boxes(tf_boxes, thresh=20) #if len(unique_indices) < len(tf_boxes): #import pdb ; pdb.set_trace() #orig_boxes = tf_boxes[unique_indices][:,[1,0,3,2]] orig_boxes = tf_boxes[:, [ 1, 0, 3, 2 ]] #[tf_boxes[:,1],tf_boxes[:,0],tf_boxes[:,3],tf_boxes[:,2]] deepsort_boxes = get_xywh(orig_boxes) features = tracker_obj.encoder(image, deepsort_boxes) if len(occluded_boxes_retained ) > 0 and reuse_occluded_features == True: assert (1 == 2) orig_boxes = list(orig_boxes) + occluded_boxes_retained deepsort_boxes = list(deepsort_boxes) + get_xywh( occluded_boxes_retained) features = list(features) + occluded_features_retained #import pdb ; pdb.set_trace() prev_scaled_boxes.append(deepsort_boxes) prev_boxes.append(orig_boxes) # score to 1.0 here). tr_detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(deepsort_boxes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in tr_detections]) prev_boxes2.append(boxes) scores = np.array([d.confidence for d in tr_detections]) indices = preprocessing.non_max_suppression( boxes, tracker_obj.nms_max_overlap, scores) tr_detections = [tr_detections[i] for i in indices] print("++++++++++++++++++++++++++++++++++", orig_boxes) det_bboxes = [] det_features = [] det_centroids = [] for det in tr_detections: bbox = det.to_tlbr() det_bboxes.append(bbox) det_centroids.append(get_centroid(bbox)) det_features.append(det.feature) print("@@@@@@@@", bbox, det.feature.max(), det.feature.min()) prev_boxes1.append(bbox) if team_classify == True: assert (1 == 2) #import pdb ; pdb.set_trace() if team_classifier_obj.trained == False: team_classifier_obj.scheduleKmeansTrain(image, bbox) else: team_id = team_classifier_obj.predict(image, bbox) print("============== TEAM ID", team_id) #import pdb ; pdb.set_trace() #import pdb ; pdb.set_trace() #prev_tr_detections = tr_detections # Call the tracker tracker.predict() tracker.update(tr_detections) ID_centroid_dict = {} prev_ID_centroid_dict = copy_dict( overall_centroid_dict, prev_ID_centroid_dict) #ID_centroid_dict # current_centroids = [] # current_ids = [] trackd_bboxes = [] trackd_centroids = [] trackd_IDs = [] image_orig = np.copy(image) for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: #or track.track_id != 3: continue trackd_bbox = track.to_tlbr() trackd_ID = track.track_id trackd_centroid = get_centroid(bbox).astype('int') trackd_bboxes.append(trackd_bbox) trackd_IDs.append(trackd_ID) trackd_centroids.append(trackd_centroid) ID_centroid_dict[trackd_ID] = trackd_centroid ID_features_map = getId_features_map(det_centroids, det_features, [trackd_bbox], [trackd_ID]) #import pdb ; pdb.set_trace() overall_centroid_dict[trackd_ID] = trackd_centroid overall_ID_bbox_dict[trackd_ID] = trackd_bbox if len(ID_features_map) > 0: overall_ID_feature_dict[trackd_ID] = ID_features_map[ trackd_ID] #import pdb ; pdb.set_trace() print("track_id = ", trackd_ID, trackd_bbox, len(track.features)) write_crops(out_folder, frame_num, image_orig, trackd_ID, trackd_bbox, x_offset=40, y_offset=50) cv2.rectangle(image, (int(trackd_bbox[0]), int(trackd_bbox[1])), (int(trackd_bbox[2]), int(trackd_bbox[3])), (255, 255, 255), 2) cv2.putText(image, str(trackd_ID), (int(trackd_bbox[0]), int(trackd_bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) #cv2.putText(image, str(ID_centroid_dict[trackd_ID]),(int(trackd_centroid[0]), int(trackd_centroid[1])),0, 5e-3 * 100, (0,255,0),2) print(frame_num, track.track_id) print("==============================", frame_num) if (frame_num > 1) and (len(trackd_IDs) > 0): #ID_features_map = getId_features_map(det_centroids , det_features , trackd_bboxes , trackd_IDs) # prev_mapped_ids = mapIds_previous(prev_centroids , prev_ids , current_centroids ) ID_props_dict = getId_props( list(overall_ID_bbox_dict.keys()), trackd_IDs, list(overall_centroid_dict.values()), trackd_centroids, imsize=[sc_width, sc_height], boundary_offset=10) #import pdb; pdb.set_trace() occluded_boxes_retained = [] occluded_features_retained = [] for ID in ID_props_dict.keys(): if ID_props_dict[ID] == 'occluded': print("occluded ID : ", ID) if ID in overall_ID_feature_dict.keys(): occluded_features_retained.append( overall_ID_feature_dict[ID]) occluded_boxes_retained.append( overall_ID_bbox_dict[ID]) #import pdb; pdb.set_trace() if ID_props_dict[ID] == 'out_of_frame': print('out_of_frame') #import pdb; pdb.set_trace() #prev_ids = current_ids #prev_centroids = current_centroids pretty_print_dicts(prev_ID_centroid_dict, ID_centroid_dict) # prev_ID_centroid_dict = overall_centroid_dict #ID_centroid_dict cv2.imshow("Frame", image) out.write(image) key = cv2.waitKey(1) & 0xFF if key == ord( "q"): # if the `q` key was pressed, break from the loop break cap.release() out.release() print("-------------- Frame:", frame_num) frame_num += 1 print("-------------- Frame:", frame_num) print(overall_centroid_dict) cap.release() out.release() cv2.destroyAllWindows()
def postprocess(self, net_out, im, frame_id=0, csv_file=None, csv=None, mask=None, encoder=None, tracker=None, save=False): """ Takes net output, draw net_out, save to disk """ boxes = self.findboxes(net_out) # meta meta = self.meta nms_max_overlap = 0.1 threshold = meta['thresh'] colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape thick = int((h + w) // 300) resultsForJSON = [] if not self.FLAGS.track: for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.json: resultsForJSON.append({ "label": mess, "confidence": float('%.2f' % confidence), "topleft": { "x": left, "y": top }, "bottomright": { "x": right, "y": bot } }) continue if self.FLAGS.display: cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx], thick // 3) else: if not ds: print( "ERROR : deep sort or sort submodules not found for tracking please run :" ) print("\tgit submodule update --init --recursive") print("ENDING") exit(1) detections = [] scores = [] for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.trackObj != mess: continue if self.FLAGS.tracker == "deep_sort": detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.FLAGS.tracker == "sort": detections.append( np.array([left, top, right, bot]).astype(np.float64)) if len(detections) < 5 and self.FLAGS.BK_MOG: detections = detections + extract_boxes(mask) detections = np.array(detections) if self.FLAGS.tracker == "deep_sort": scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks elif self.FLAGS.tracker == "sort": trackers = tracker.update(detections) for track in trackers: if self.FLAGS.tracker == "deep_sort": if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) elif self.FLAGS.tracker == "sort": bbox = [ int(track[0]), int(track[1]), int(track[2]), int(track[3]) ] id_num = str(int(track[4])) if self.FLAGS.csv: csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ]) csv_file.flush() if self.FLAGS.display: cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), thick // 3) cv2.putText(imgcv, id_num, (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, (255, 255, 255), thick // 6) if not save: return imgcv outfolder = os.path.join(self.FLAGS.imgdir, 'out') img_name = os.path.join(outfolder, os.path.basename(im)) if self.FLAGS.json: textJSON = json.dumps(resultsForJSON) textFile = os.path.splitext(img_name)[0] + ".json" with open(textFile, 'w') as f: f.write(textJSON) return cv2.imwrite(img_name, imgcv)
def postprocess(self, net_out, im, frame_id=0, csv_file=None, csv=None, mask=None, encoder=None, tracker=None, save=False): # ids_in_current_frame = [] hscd_ids_in_current_frame_set = set([]) cmwd_ids_in_current_frame_set = set([]) tsv_ids_in_current_frame_set = set([]) # counter() video_name = os.path.basename(self.FLAGS.demo) video_name = video_name[:-4] """ Takes net output, draw net_out, save to disk """ boxes = self.findboxes(net_out) # meta meta = self.meta nms_max_overlap = 0.1 threshold = meta['thresh'] colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape thick = int((h + w) // 300) resultsForJSON = [] if not self.FLAGS.track: for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.json: resultsForJSON.append({ "label": mess, "confidence": float('%.2f' % confidence), "topleft": { "x": left, "y": top }, "bottomright": { "x": right, "y": bot } }) continue if self.FLAGS.display: cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx], thick // 6) else: if not ds: print( "ERROR : deep sort or sort submodules not found for tracking please run :" ) print("\tgit submodule update --init --recursive") print("ENDING") exit(1) detections = [] scores = [] for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.trackObj != mess: continue if self.FLAGS.tracker == "deep_sort": detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.FLAGS.tracker == "sort": detections.append( np.array([left, top, right, bot]).astype(np.float64)) if len(detections) < 5 and self.FLAGS.BK_MOG: detections = detections + extract_boxes(mask) detections = np.array(detections) if self.FLAGS.tracker == "deep_sort": scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks elif self.FLAGS.tracker == "sort": trackers = tracker.update(detections) for track in trackers: if self.FLAGS.tracker == "deep_sort": if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) elif self.FLAGS.tracker == "sort": bbox = [ int(track[0]), int(track[1]), int(track[2]), int(track[3]) ] id_num = str(int(track[4])) """ Entry point to all application functions """ # ------------------------------------------------------------------------- # ------------------------------------------------------------------------- """ if counting cars has been chosen by user then this block gets executed PER OBJECT ID AFTER TRACKING """ if self.FLAGS.counting_cars: saveAs = os.path.basename(self.FLAGS.demo) saveAs = saveAs[:-4] count, line = countingCars.count(int(frame_id), int(id_num), bbox, h) # f = open("data/" + saveAs + "/output" + "/car_count_{}.txt".format(saveAs), 'w') f = open( self.FLAGS.path_to_output + "/" + self.FLAGS.start_time + "_car_count_{}.txt".format(saveAs), 'w') f.write('Cars_Counted: {}'.format(count)) # ------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------ # video_name = os.path.splitext(os.path.basename(self.FLAGS.demo))[0] """ if speed estimation has been chosen by user then this block gets executed PER OBJECT ID AFTER TRACKING """ if self.FLAGS.speed_estimation: global speed_estimation_result speed_estimation_result = speed_estimation.get_speed( video_name, np.copy(imgcv), int(frame_id), int(id_num), [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) saveAs = os.path.basename(self.FLAGS.demo) saveAs = saveAs[:-4] # ------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------- """ if direction detection has been chosen by user then this block gets executed PER OBJECT ID AFTER TRACKING """ if self.FLAGS.direction_detection: global direction_detection_result direction_detection_result = direction_detection.get_direction( video_name, np.copy(imgcv), int(frame_id), int(id_num), [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) # ------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------ """ if traffic signal violation detection has been chosen by user then this block gets executed PER OBJECT ID AFTER TRACKING """ if self.FLAGS.traffic_signal_violation_detection: global traffic_signal_violation_result, direction_detection_result traffic_signal_violation_result = \ traffic_signal_violation_detection.detect_red_violation(video_name, imgcv, int(frame_id), int(id_num), [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])], direction_detection_result) # ------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------ if self.FLAGS.csv: csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ]) csv_file.flush() """ if display flag is enabled, it annotates the car """ if self.FLAGS.display: car_violated = False cv2.putText(imgcv, id_num, (int(bbox[0]), int(bbox[1]) - 12), 0, int(1e-3 * h), (255, 255, 255), int(thick // 6)) if self.FLAGS.counting_cars: text_size = cv2.getTextSize(str(count), 0, 2, 2) pt1 = 90, 180 pt2 = pt1[0] + 10 + text_size[0][0], pt1[ 1] + 10 + text_size[0][1] center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1] cv2.line(imgcv, (0, line), (1920, line), (243, 150, 33), 5) cv2.rectangle(imgcv, pt1, pt2, (243, 150, 33), -1) cv2.putText(imgcv, str(count), center, 0, 2, (34, 87, 255), 2) if self.FLAGS.speed_estimation: cv2.rectangle(imgcv, (10, 10), (30, 30), (30, 44, 243), -1) cv2.putText(imgcv, "speed violation", (40, 30), 0, int(1e-3 * h), (30, 44, 243), int(thick // 4)) if self.FLAGS.direction_detection: cv2.rectangle(imgcv, (10, 40), (30, 60), (249, 0, 213), -1) cv2.putText(imgcv, "direction violation", (40, 60), 0, int(1e-3 * h), (249, 0, 213), int(thick // 4)) if self.FLAGS.traffic_signal_violation_detection: cv2.rectangle(imgcv, (10, 70), (30, 90), (0, 255, 255), -1) cv2.putText(imgcv, "traffic signal violation", (40, 90), 0, int(1e-3 * h), (0, 255, 255), int(thick // 4)) # ------------------------------------------------------------------------------------- if speed_estimation_result > speed_estimation.speed_threshold: cv2.putText(imgcv, str(speed_estimation_result), (int(bbox[0] + 50), int(bbox[1] - 12)), cv2.FONT_HERSHEY_SIMPLEX, int(1e-3 * h), (0, 255, 255), int(thick // 6)) cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (30, 44, 243), thick // 3) self.__speed_estimation_result = speed_estimation_result self.__speed_threshold = speed_estimation.speed_threshold # 1. extract video name saveAs = os.path.basename(self.FLAGS.demo) saveAs = saveAs[:-4] # 2. make folders object wise if not os.path.exists(self.FLAGS.path_to_output + '/hscd/' + self.FLAGS.start_time + '_videos/' + str(id_num)): os.makedirs(self.FLAGS.path_to_output + '/hscd/' + self.FLAGS.start_time + '_videos/' + str(id_num)) # 3. write frame names to a file vCars = open( str(self.FLAGS.path_to_output) + '/hscd/' + self.FLAGS.start_time + '_videos/' + str(id_num) + "/" + str(self.FLAGS.start_time) + "_{}.txt".format(id_num), 'a') vCars.write( str(self.FLAGS.start_time) + '_frame_{}.jpg\n'.format(frame_id)) vCars.close() # 4. write frames path = self.FLAGS.path_to_output + '/hscd/' + self.FLAGS.start_time + '_videos/' + str( id_num) image_name_string = str( self.FLAGS.start_time) + "_frame_" + str( frame_id) + ".jpg" cv2.imwrite(os.path.join(path, image_name_string), imgcv) # 5. active, history hscd_ids_in_current_frame_set.add(id_num) hscd_active_ids_set.add(id_num) # ids_in_current_frame.append(int(id_num)) # active_ids.append(int(id_num)) # frames_of_tracks[int(id_num)].append(frame_id) data = { 'module_name': "hscd", 'location': self.FLAGS.location_name, 'tagged_car_id': int(id_num), 'time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'speed_detected': self.__speed_estimation_result, 'speed_threshold': self.__speed_threshold, 'url': self.FLAGS.path_to_output + '/hscd/' + self.FLAGS.start_time + '_videos/' + str(id_num) + '/' + str(self.FLAGS.start_time) + "_" + str(id_num) + '.mp4' } json_file_path = self.FLAGS.path_to_output + '/hscd/' + self.FLAGS.start_time + '_videos/' + str( id_num) json_file_name = str( self.FLAGS.start_time) + "_" + str(id_num) + '.json' with open(os.path.join(json_file_path, json_file_name), 'w') as f: json.dump(data, f) car_violated = True elif speed_estimation.speed_threshold > speed_estimation_result > 0: cv2.putText(imgcv, id_num, (int(bbox[0]), int(bbox[1]) - 12), 0, int(1e-3 * h), (255, 255, 255), int(thick // 6)) cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), thick // 3) car_violated = False # ------------------------------------------------------------------------------------- if direction_detection_result == 1: cv2.rectangle(imgcv, (int(bbox[0]) + 10, int(bbox[1]) + 10), (int(bbox[2]) - 10, int(bbox[3]) - 10), (249, 0, 213), 4) # # 1. extract video name # saveAs = os.path.basename(self.FLAGS.demo) # saveAs = saveAs[:-4] # 2. make folders object wise if not os.path.exists(self.FLAGS.path_to_output + '/cmwd/' + self.FLAGS.start_time + '_videos/' + str(id_num)): os.makedirs(self.FLAGS.path_to_output + '/cmwd/' + self.FLAGS.start_time + '_videos/' + str(id_num)) # 3. write frame names to a file vCars = open( str(self.FLAGS.path_to_output) + '/cmwd/' + self.FLAGS.start_time + '_videos/' + str(id_num) + "/" + str(self.FLAGS.start_time) + "_{}.txt".format(id_num), 'a') vCars.write( str(self.FLAGS.start_time) + '_frame_{}.jpg\n'.format(frame_id)) vCars.close() # 4. write frames path = self.FLAGS.path_to_output + '/cmwd/' + self.FLAGS.start_time + '_videos/' + str( id_num) image_name_string = str( self.FLAGS.start_time) + "_frame_" + str( frame_id) + ".jpg" cv2.imwrite(os.path.join(path, image_name_string), imgcv) # 5. active, history cmwd_ids_in_current_frame_set.add(id_num) cmwd_active_ids_set.add(id_num) # ids_in_current_frame.append(int(id_num)) # active_ids.append(int(id_num)) # frames_of_tracks[int(id_num)].append(frame_id) data = { 'module_name': "cmwd", 'location': self.FLAGS.location_name, 'tagged_car_id': int(id_num), 'time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'url': self.FLAGS.path_to_output + '/cmwd/' + self.FLAGS.start_time + '_videos/' + str(id_num) + '/' + str(self.FLAGS.start_time) + "_" + str(id_num) + '.mp4' } json_file_path = self.FLAGS.path_to_output + '/cmwd/' + self.FLAGS.start_time + '_videos/' + str( id_num) json_file_name = str( self.FLAGS.start_time) + "_" + str(id_num) + '.json' with open(os.path.join(json_file_path, json_file_name), 'w') as f: json.dump(data, f) car_violated = True # ------------------------------------------------------------------------------------- if traffic_signal_violation_result: cv2.rectangle(imgcv, (int(bbox[0]) - 10, int(bbox[1]) - 10), (int(bbox[2]) + 10, int(bbox[3]) + 10), (0, 255, 255), 4) # 2. make folders object wise if not os.path.exists(self.FLAGS.path_to_output + '/tsv/' + self.FLAGS.start_time + '_videos/' + str(id_num)): os.makedirs(self.FLAGS.path_to_output + '/tsv/' + self.FLAGS.start_time + '_videos/' + str(id_num)) # 3. write frame names to a file vCars = open( str(self.FLAGS.path_to_output) + '/tsv/' + self.FLAGS.start_time + '_videos/' + str(id_num) + "/" + str(self.FLAGS.start_time) + "_{}.txt".format(id_num), 'a') vCars.write( str(self.FLAGS.start_time) + '_frame_{}.jpg\n'.format(frame_id)) vCars.close() # 4. write frames path = self.FLAGS.path_to_output + '/tsv/' + self.FLAGS.start_time + '_videos/' + str( id_num) image_name_string = str( self.FLAGS.start_time) + "_frame_" + str( frame_id) + ".jpg" cv2.imwrite(os.path.join(path, image_name_string), imgcv) # 5. active, history tsv_ids_in_current_frame_set.add(id_num) tsv_active_ids_set.add(id_num) # ids_in_current_frame.append(int(id_num)) # active_ids.append(int(id_num)) # frames_of_tracks[int(id_num)].append(frame_id) data = { 'module_name': "tsv", 'location': self.FLAGS.location_name, 'tagged_car_id': int(id_num), 'time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'url': self.FLAGS.path_to_output + '/tsv/' + self.FLAGS.start_time + '_videos/' + str(id_num) + '/' + str(self.FLAGS.start_time) + "_" + str(id_num) + '.mp4' } json_file_path = self.FLAGS.path_to_output + '/tsv/' + self.FLAGS.start_time + '_videos/' + str( id_num) json_file_name = str( self.FLAGS.start_time) + "_" + str(id_num) + '.json' with open(os.path.join(json_file_path, json_file_name), 'w') as f: json.dump(data, f) car_violated = True if not car_violated: cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), thick // 3) alertsFile = open( self.FLAGS.path_to_output + "/" + self.FLAGS.start_time + "_alert_poller.txt", 'a') ################################################## HSCD ##################################### # inactive_ids = active_ids - ids_in_current_frame hscd_inactive_id_set = hscd_active_ids_set - hscd_ids_in_current_frame_set # print(frame_id, hscd_inactive_id_set) for object_id in hscd_inactive_id_set: path = self.FLAGS.path_to_output + '/hscd/' + self.FLAGS.start_time + '_videos/' + str( object_id) + "/" # img_path = os.path.join(self.FLAGS.output_dir, path) img_path = path frame_path = path + str( self.FLAGS.start_time) + "_{}.txt".format(object_id) v_path = path + str(self.FLAGS.start_time) + "_" + object_id if not object_id in hscd_processed_ids_set: # print("object_id: {} already present".format(object_id)) alertsFile.write(self.FLAGS.path_to_output + '/hscd/{}_videos/{}/{}_{}.json\n'.format( self.FLAGS.start_time, object_id, self.FLAGS.start_time, object_id)) fps = 15 out = cv2.VideoWriter(v_path + ".avi", cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (w, h)) with open(frame_path) as fp: for line in fp: line = line.strip() v_image = cv2.imread(img_path + line) out.write(v_image) if os.path.isfile(img_path + line): os.remove(img_path + line) out.release() else: with open(frame_path) as fp: for line in fp: line = line.strip() if os.path.isfile(img_path + line): os.remove(img_path + line) hscd_processed_ids_set.add(object_id) hscd_active_ids_set.remove(object_id) # alertsFile.close() # inactive_ids = [i for i in active_ids if i not in ids_in_current_frame] # inactive_ids = list(set(inactive_ids)) # print inactive_ids # for object_id in inactive_ids: # print(frames_of_tracks[object_id]) # draw video corresponding to object_id # delete object_id from active_ids ################################################## HSCD ##################################### ################################################## CMWD ##################################### cmwd_inactive_id_set = cmwd_active_ids_set - cmwd_ids_in_current_frame_set for object_id in cmwd_inactive_id_set: path = self.FLAGS.path_to_output + '/cmwd/' + self.FLAGS.start_time + '_videos/' + str( object_id) + "/" img_path = path frame_path = path + str( self.FLAGS.start_time) + "_{}.txt".format(object_id) v_path = path + str(self.FLAGS.start_time) + "_" + object_id if not object_id in cmwd_processed_ids_set: alertsFile.write(self.FLAGS.path_to_output + '/cmwd/{}_videos/{}/{}_{}.json\n'.format( self.FLAGS.start_time, object_id, self.FLAGS.start_time, object_id)) fps = 15 out = cv2.VideoWriter(v_path + ".avi", cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (w, h)) with open(frame_path) as fp: for line in fp: line = line.strip() v_image = cv2.imread(img_path + line) out.write(v_image) if os.path.isfile(img_path + line): os.remove(img_path + line) out.release() else: with open(frame_path) as fp: for line in fp: line = line.strip() if os.path.isfile(img_path + line): os.remove(img_path + line) cmwd_processed_ids_set.add(object_id) cmwd_active_ids_set.remove(object_id) ################################################## CMWD ##################################### ################################################## TSV ##################################### # inactive_ids = active_ids - ids_in_current_frame tsv_inactive_id_set = tsv_active_ids_set - tsv_ids_in_current_frame_set # print(frame_id, tsv_inactive_id_set) for object_id in tsv_inactive_id_set: path = self.FLAGS.path_to_output + '/tsv/' + self.FLAGS.start_time + '_videos/' + str( object_id) + "/" img_path = path frame_path = path + str( self.FLAGS.start_time) + "_{}.txt".format(object_id) v_path = path + str(self.FLAGS.start_time) + "_" + object_id if not object_id in tsv_processed_ids_set: alertsFile.write(self.FLAGS.path_to_output + '/tsv/{}_videos/{}/{}_{}.json\n'.format( self.FLAGS.start_time, object_id, self.FLAGS.start_time, object_id)) fps = 15 out = cv2.VideoWriter(v_path + ".avi", cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (w, h)) with open(frame_path) as fp: for line in fp: line = line.strip() v_image = cv2.imread(img_path + line) out.write(v_image) if os.path.isfile(img_path + line): os.remove(img_path + line) out.release() else: with open(frame_path) as fp: for line in fp: line = line.strip() if os.path.isfile(img_path + line): os.remove(img_path + line) tsv_processed_ids_set.add(object_id) tsv_active_ids_set.remove(object_id) alertsFile.close() ################################################## TSV ##################################### if not save: return imgcv outfolder = os.path.join(self.FLAGS.imgdir, 'out') img_name = os.path.join(outfolder, os.path.basename(im)) if self.FLAGS.json: textJSON = json.dumps(resultsForJSON) textFile = os.path.splitext(img_name)[0] + ".json" with open(textFile, 'w') as f: f.write(textJSON) return cv2.imwrite(img_name, imgcv)
2:] = pred_bbox_pixel_tlwh[:, 2:] - pred_bbox_pixel_tlwh[:, : 2] pred_ldmk_pixel = pred_ldmk_pixel[valid_index, :] pred_prob = pred_prob[valid_index] # loop over faces detections = [] for i in (range(pred_prob.shape[0])): # face recognition aligned = face_algin_by_landmark(face_imgs[i], face_landmarks[i]) feature, mask = face_recognizer.face_recognize(aligned) new_detection = Detection((pred_bbox_pixel_tlwh[i]), confidence=pred_prob[i], feature=feature.squeeze(0)) detections.append(new_detection) result = face_recognition(feature, rec_db) if args.mask: is_mask = np.argmax(mask) == 0 if is_mask: mask_label = "mask" else: mask_label = "no mask" ((label_width, label_height), _) = cv2.getTextSize(mask_label, fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=1, thickness=2)
def postprocess(self, net_out, im, frame_id=0, csv_file=None, csv=None, mask=None, encoder=None, tracker=None, ranger=None): """ Takes net output, draw net_out, save to disk """ boxes = self.findboxes(net_out) # meta meta = self.meta nms_max_overlap = 0.1 threshold = meta['thresh'] # colors = [tuple(255 * np.random.rand(3)) for _ in range(10)] # Converted from meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape thick = int((h + w) // 300) resultsForJSON = [] if not self.FLAGS.track: for b in boxes: boxResults = self.process_box(b, h, w, threshold) #print("hi from postprocess") if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.json: resultsForJSON.append({ "label": mess, "confidence": float('%.2f' % confidence), "topleft": { "x": left, "y": top }, "bottomright": { "x": right, "y": bot } }) continue if self.FLAGS.display or self.FLAGS.saveVideo: print("else hii") text = "{ID: } {:.0f}%".format(str(mess), confidence * 100) cv2.rectangle(imgcv, (left, top), (right, bot), random.choice(colors), thick) cv2.putText(imgcv, text, (left, top - 12), 0, 1e-3 * h, random.choice(colors), thick // 3) else: if not ds: print( "ERROR : deep sort or sort submodules not found for tracking please run :" ) print("\tgit submodule update --init --recursive") print("ENDING") exit(1) detections = [] scores = [] res = None for b in boxes: boxResults = self.process_box(b, h, w, threshold) #print("hi from postprocess") if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults res = ((left, top), (right, bot)) if mess not in self.FLAGS.trackObj: continue if self.FLAGS.tracker == "deep_sort": detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.FLAGS.tracker == "sort": detections.append( np.array([left, top, right, bot]).astype(np.float64)) if len(detections) < 3 and self.FLAGS.BK_MOG: detections = detections + extract_boxes(self, mask) detections = np.array(detections) if detections.shape[0] == 0: return imgcv, res if self.FLAGS.tracker == "deep_sort": scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks elif self.FLAGS.tracker == "sort": trackers = tracker.update(detections) for track in trackers: if self.FLAGS.tracker == "deep_sort": if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) #print(colors[int(id_num)%7]) color = colors[int(id_num) % 7] elif self.FLAGS.tracker == "sort": bbox = [ int(track[0]), int(track[1]), int(track[2]), int(track[3]) ] id_num = str(int(track[4])) color = random.choice(colors) text = "{}ID:{} Confidence:{:.1f}".format(mess, id_num, confidence) if self.FLAGS.csv: csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ]) csv_file.flush() if self.FLAGS.display or self.FLAGS.saveVideo: cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 0), thick) cv2.putText(imgcv, text, (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, (0, 0, 0), thick // 3) return imgcv, res
def efficientDet_video_inference(video_src,compound_coef = 0,force_input_size=None, frame_skipping = 3, threshold=0.2,out_path=None,imshow=False, display_fps=False): #deep-sort variables # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 model_filename = '/home/shaheryar/Desktop/Projects/Football-Monitoring/deep_sort/model_weights/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric,n_init=5) # efficientDet-pytorch variables iou_threshold = 0.4 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MPEG') fps = cap.get(cv2.CAP_PROP_FPS) print("Video fps",fps) if(out_path is not None): outp = cv2.VideoWriter(out_path, fourcc, fps, (frame_width, frame_height)) i=0 start= time.time() current_frame_fps=0 while True: ret, frame = cap.read() if not ret: break t1=time.time() if (frame_skipping==0 or i%frame_skipping==0): # if(True): # frame preprocessing (running detections) ori_imgs, framed_imgs, framed_metas, t1 = preprocess_video(frame, width=input_size, height=input_size) if use_cuda: x = torch.stack([fi.cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) # model predict t1=time.time() with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # Post processing out = invert_affine(framed_metas, out) # decoding bbox ,object name and scores boxes,classes,scores =decode_predictions(out[0]) org_boxes = boxes.copy() t2 = time.time() - t1 # feature extraction for deep sort boxes = [convert_bbox_to_deep_sort_format(frame.shape, b) for b in boxes] features = encoder(frame,boxes) detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)] boxes = np.array([d.tlwh for d in detections]) # print(boxes) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) i = i + 1 img_show=frame.copy() for j in range(len(org_boxes)): img_show =drawBoxes(img_show,org_boxes[j],(255,255,0),str(tracker.tracks[j].track_id)) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() x1=int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2=int(bbox[3]) roi= frame[y1:y2,x1:x2] cv2.rectangle(img_show, (x1, y1), (x2, y2), update_color_association(roi, track.track_id), 2) cv2.putText(img_show, str(track.track_id), (x1, y1), 0, 5e-3 * 100, (255, 255, 0), 1) if display_fps: current_frame_fps=1/t2 else: current_frame_fps=0 cv2.putText(img_show, 'FPS: {0:.2f}'.format(current_frame_fps), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) if (i % int(fps) == 0): print("Processed ", str(int(i / fps)), "seconds") print("Time taken",time.time()-start) # print(color_dict) if imshow: img_show=cv2.resize(img_show,(0,0),fx=0.75,fy=0.75) cv2.imshow('Frame',img_show) # Press Q on keyboard to exit if cv2.waitKey(1) & 0xFF == ord('q'): break if out_path is not None: outp.write(img_show) cap.release() outp.release()
def postprocess(self, net_out, im, frame_id=0, csv_file=None, csv=None, mask=None, encoder=None, tracker=None): """ Takes net output, draw net_out, save to disk """ boxes = self.findboxes(net_out) # meta meta = self.meta nms_max_overlap = 0.1 threshold = meta['thresh'] colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape thick = int((h + w) // 300) resultsForJSON = [] if not self.FLAGS.track: for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.json: resultsForJSON.append({ "label": mess, "confidence": float('%.2f' % confidence), "topleft": { "x": left, "y": top }, "bottomright": { "x": right, "y": bot } }) continue if self.FLAGS.display or self.FLAGS.saveVideo: cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx], thick // 3) else: if not ds: print( "ERROR : deep sort or sort submodules not found for tracking please run :" ) print("\tgit submodule update --init --recursive") print("ENDING") exit(1) detections = [] scores = [] confianza = [] savedid = [] for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if mess not in self.FLAGS.trackObj: continue if self.FLAGS.tracker == "deep_sort": detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.FLAGS.tracker == "sort": detections.append( np.array([left, top, right, bot, confidence]).astype(np.float64)) confianza.append(confidence) if len(detections) < 3 and self.FLAGS.BK_MOG: detections = detections + extract_boxes(self, mask) detections = np.array(detections) if detections.shape[0] == 0: return imgcv if self.FLAGS.tracker == "deep_sort": scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) conf = d.confidence indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks elif self.FLAGS.tracker == "sort": #print detections trackers, savedid, unmatched_dets, unmatched_trks = tracker.update( detections, confidence) #print trackers printtracker = 1 if printtracker == 1: posConf = [] NTrackers = 0 for idTrackers in trackers: NDetections = 0 for idDetections in detections: if (15 >= abs((math.ceil(detections[NDetections][0])) - (math.ceil(trackers[NTrackers][0]))) ) or (15 >= abs((math.ceil(detections[NDetections][1])) - (math.ceil(trackers[NTrackers][1])))): posConf.append(NDetections) NDetections = NDetections + 1 NTrackers = NTrackers + 1 N = 0 #print posConf for track in trackers: if self.FLAGS.tracker == "deep_sort": if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) elif self.FLAGS.tracker == "sort": #N = detecciones[0] bbox = [ int(track[0]), int(track[1]), int(track[2]), int(track[3]) ] id_num = str(int(track[4])) #for idbox in trackers: #print bbox if printtracker == 1: id_scores = str(confianza[posConf[N]]) else: id_scores = str(0) N = N + 1 if self.FLAGS.csv: csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]), id_scores ]) csv_file.flush() if self.FLAGS.display or self.FLAGS.saveVideo: cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 0), thick // 3) cv2.putText(imgcv, id_num, (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, (255, 255, 0), thick // 6) if printtracker == 1: cv2.putText(imgcv, id_scores[0:4], ((int(bbox[2])), int(bbox[3]) - 12), 0, 1e-3 * h / 1.3, (255, 0, 255), thick // 6) return imgcv
def postprocess(self, callback, net_out, im, frame_id=0, csv_file=None, csv=None, mask=None, encoder=None, tracker=None): """ Takes net output, draw net_out, save to disk """ boxes = self.findboxes(net_out) # meta meta = self.meta nms_max_overlap = 0.1 threshold = meta['thresh'] colors = meta['colors'] labels = meta['labels'] if type(im) is not np.ndarray: imgcv = cv2.imread(im) else: imgcv = im h, w, _ = imgcv.shape thick = int((h + w) // 300) resultsForJSON = [] llps = self.FLAGS.list_xy #(int(self.FLAGS.x1), int(self.FLAGS.y1)), (int(self.FLAGS.x2), int(self.FLAGS.y2))] if not self.FLAGS.track: for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.json: resultsForJSON.append({ "label": mess, "confidence": float('%.2f' % confidence), "topleft": { "x": left, "y": top }, "bottomright": { "x": right, "y": bot } }) continue if self.FLAGS.display or self.FLAGS.saveVideo: cv2.rectangle(imgcv, (left, top), (right, bot), colors[max_indx], thick) cv2.putText(imgcv, mess, (left, top - 12), 0, 1e-3 * h, colors[max_indx], thick // 3) else: if not ds: print( "ERROR : deep sort or sort submodules not found for tracking please run :" ) print("\tgit submodule update --init --recursive") print("ENDING") exit(1) detections = [] scores = [] for b in boxes: boxResults = self.process_box(b, h, w, threshold) if boxResults is None: continue left, right, top, bot, mess, max_indx, confidence = boxResults if self.FLAGS.trackObj != mess: continue if self.FLAGS.tracker == "deep_sort": detections.append( np.array([left, top, right - left, bot - top]).astype(np.float64)) scores.append(confidence) elif self.FLAGS.tracker == "sort": detections.append( np.array([left, top, right, bot]).astype(np.float64)) if len(detections) < 5 and self.FLAGS.BK_MOG: detections = detections + extract_boxes(mask) detections = np.array(detections) if self.FLAGS.tracker == "deep_sort": scores = np.array(scores) features = encoder(imgcv, detections.copy()) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip(detections, scores, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = prep.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) trackers = tracker.tracks elif self.FLAGS.tracker == "sort": trackers = tracker.update(detections) for track in trackers: if self.FLAGS.tracker == "deep_sort": if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() id_num = str(track.track_id) elif self.FLAGS.tracker == "sort": bbox = [ int(track[0]), int(track[1]), int(track[2]), int(track[3]) ] id_num = str(int(track[4])) #tracker center_x = int((int(bbox[0]) + (int(bbox[2]) - int(bbox[0])) / 2)) center_y = int((int(bbox[1]) + (int(bbox[3]) - int(bbox[1])) / 2)) if self.FLAGS.csv: csv.writerow([ frame_id, id_num, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]) ]) csv_file.flush() if self.FLAGS.display or self.FLAGS.saveVideo or self.FLAGS.counter: #id_person = int(update_csv(int(id_num))) id_person = int(id_num) id_person_color = int(id_person % len(list_color)) #display bbox cv2.rectangle(imgcv, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), thick // 3) #cv2.putText(imgcv, str(update_csv(int(id_num))), (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, (255, 0, 255), thick // 6) #id cv2.putText(imgcv, str(id_person), (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, list_color[id_person_color], thick // 3) dict[id_person].append((center_x, center_y)) for i in range(0, len(dict[id_person])): cv2.circle(imgcv, dict[id_person][i], 1, list_color[id_person_color], thick // 5) if i > 0: cv2.line(imgcv, dict[id_person][i - 1], dict[id_person][i], list_color[id_person_color], 2) #pretect tracker from overflowing person_count[id_person] = person_count[id_person] + 1 # frame num = 200 10s if person_count[id_person] % 10 == 0: person_count[id_person] = 0 dict[id_person] = [] if self.FLAGS.counter: if id_num not in ids: #line check if len(llps) > 0: res = linecheck(llps, bbox) #bbox check else: res = rangecheck((center_x, center_y), rrps) if res: ids.append(id_num) font = cv2.FONT_HERSHEY_TRIPLEX cv2.putText(imgcv, 'count: ' + str(len(ids)), (10, 70), 0, 1e-3 * h, (0, 0, 255), thick // 2) callback(len(ids)) #draw line if len(llps) > 0: lines = llps else: lines = rrps uu = 0 while uu < len(lines) - 1: lineThickness = 2 cv2.line(imgcv, (lines[uu][0], lines[uu][1]), (lines[uu + 1][0], lines[uu + 1][1]), (0, 255, 0), lineThickness) uu += 2 return imgcv
def detect_n_track_video(detect_net , tracker_obj , vidname , detection_threshold = 0.8 , scale = 1.0 , skip = 0 , fps = None): frame_num = 0 cap = cv2.VideoCapture(vidname) W = int(cap.get(3)) H = int(cap.get(4)) video_size = [int(W),int(H)] length = int(cap.get(7)) vid_fps = int(round(cap.get(5))) vid_fps = fps or vid_fps print(video_size) print("Video Size :",video_size) print("FPS : ",vid_fps) tracker = tracker_obj.tracker out_filename = vidname.split('.')[0].split('/')[-1] + '_out.avi' out_logname = vidname.split('.')[0].split('/')[-1] + '_out.csv' out = cv2.VideoWriter(out_filename,cv2.VideoWriter_fourcc('X','2', '6', '4'), vid_fps, (int(W*scale),int(H*scale))) out_log = open(out_logname , 'w') writeLog(out_log , 'frame_number' , 'ID' , ['xmin','ymin','xmax','ymax'] ) while True: if frame_num % (skip+1) == 0 : ret, image = cap.read() if not ret:break #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) sc_width,sc_height = [int(W*scale) , int(H*scale)] if scale != 1: print(sc_width,sc_height) image = cv2.resize(image, (sc_width,sc_height), interpolation=cv2.INTER_AREA) #image[0:260,:] = 0 ## convert to RGB space image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) print("Image shape: ", image.shape) # print(image) detections = image_fprop(detect_net,image) num_detections = int(len(detections)) print("Num detections:" , num_detections) boxes = [] for det in detections: boxes.append(det['bbox']) scaled_boxes = get_xywh(boxes) features = tracker_obj.encoder(image,scaled_boxes) # score to 1.0 here). tr_detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(scaled_boxes, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in tr_detections]) scores = np.array([d.confidence for d in tr_detections]) indices = preprocessing.non_max_suppression(boxes, tracker_obj.nms_max_overlap, scores) tr_detections = [tr_detections[i] for i in indices] for det in tr_detections: bbox = det.to_tlbr() # Call the tracker tracker.predict() tracker.update(tr_detections) #import pdb; pdb.set_trace() #flag = False for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(image, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) writeLog(out_log , frame_num , track.track_id , np.array(bbox,dtype= 'int16') , imsize=[sc_width,sc_height]) #if min(bbox) < 0 : # flag = True cv2.imshow("Frame", image) out.write(image) key = cv2.waitKey(1) & 0xFF #if flag == True: # import pdb ; pdb.set_trace() if key == ord("q"):# if the `q` key was pressed, break from the loop break cap.release() out.release() out_log.close() print("-------------- Frame:",frame_num) print("\n\n ============ output written as : " , out_filename , " and " , out_logname) frame_num += 1 print("-------------- Frame:",frame_num) cap.release() out.release() out_log.close() cv2.destroyAllWindows() print("\n\n ============ output written as : " , out_filename , " and " , out_logname)
infer_time = [] videoSize = (video_capturer.get(3), video_capturer.get(4)) while video_capturer.isOpened(): _, frame = video_capturer.read() bboxes = personDetector.Detect(frame) # Draw detections for bbox in bboxes: frame = personDetector.DrawBox( frame, bbox, videoSize, thickness=5) if deepSort: bboxes = personDetector.returnJustPersonBox(bboxes) features = encoder(frame, np.array( personDetector.bboxToWH(bboxes))) detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip( personDetector.bboxToWH(bboxes), features)] # Call tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() frame = personDetector.DrawBox(frame, personDetector.bboxToXY( bbox, videoSize), videoSize, color=(255, 255, 255), thickness=2) cv2.putText(frame, str(track.track_id), (int( bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) elif sort:
def detect_video(graph, yolo, encoder, video_filepath, mark_on_video, show): global_object_ids = dict() object_id_counter = 0 # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 0.3 # 1.0 # read video filepath cap = cv2.VideoCapture(video_filepath) try: # per class object tracker class_tracker_dict = dict() i_frame = -1 detection_dict = {} while True: ret, frame = cap.read() i_frame += 1 if not ret: break with graph.as_default(): out_boxes, out_scores, out_classes = detect_frame( yolo=yolo, image=Image.fromarray(frame)) # convert to [x,y,w,h] boxs = np.array([[bb[1], bb[0], bb[3] - bb[1], bb[2] - bb[0]] for bb in out_boxes]) # add detections to class tracker dict for class_id in np.unique(out_classes): if class_id not in class_tracker_dict: metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) class_tracker_dict[class_id] = Tracker(metric) # update all trackers with incoming detections for class_id, tracker in class_tracker_dict.items(): inds = out_classes == class_id with graph.as_default(): features = encoder(frame, boxs[inds]) detections = [ Detection(bbox, score, feature) for bbox, score, feature in zip( boxs[inds], out_scores[inds], features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker class_tracker_dict[class_id].predict() class_tracker_dict[class_id].update(detections) # Save detection per frame per object_id detection_dict[i_frame] = list() for class_id, tracker in class_tracker_dict.items(): for track in tracker.tracks: # if not track.is_confirmed() or track.time_since_update > 1: if track.time_since_update > 2: continue bbox = track.to_tlbr() object_name = '{}_{}'.format(yolo.class_names[class_id], track.track_id) if object_name not in global_object_ids: global_object_ids[object_name] = object_id_counter object_id_counter += 1 # mark in video label = yolo.class_names[class_id] left = int(bbox[0]) top = int(bbox[1]) right = int(bbox[2]) bottom = int(bbox[3]) object_id = track.track_id if mark_on_video: cv2.rectangle(frame, (left, top), (right, bottom), (255, 255, 255), 2) cv2.putText(frame, '{}_{}'.format(label, object_id), (int(bbox[0]), int(bbox[1])), 0, 1, (0, 255, 0), 1) # create results dictionary detection_dict[i_frame].append({ 'top': top, 'left': left, 'right': right, 'bottom': bottom, 'label': label, 'object_id': global_object_ids[object_name] }) if show: cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) cv2.imshow("result", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break finally: cap.release() return detection_dict