def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True writeVideo_flag = True asyncVideo_flag = False file_path = 'video.webm' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: det_cls = det.cls score = "%.2f" % (det.confidence * 100) + "%" cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.putText( frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) #cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 TestOutput = [] # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print( 'Video has ended or failed, try a different video format!, Test reponse' ) AlexFun(TestOutput, video_path) #print(TestOutput) break frame_num += 1 #print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] #allowed_classes = ['person', 'car', 'truck'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: #print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]),int(bbox[1]),int(bbox[2]), int(bbox[3])))) #TestOutput.append("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) TestXCO = (int(bbox[0]) + int(bbox[2])) / 2 TestYCO = (int(bbox[1]) + int(bbox[3])) / 2 TestTemp = [frame_num] TestTemp.append(str(track.track_id)) TestTemp.append(class_name) TestTemp.append(TestXCO) TestTemp.append(TestYCO) TestTemp.append(int(bbox[0])) TestTemp.append(int(bbox[1])) TestTemp.append(int(bbox[2])) TestTemp.append(int(bbox[3])) TestOutput.append(TestTemp) # append to a list that line every time, will need to also append when its starting the fram, or include it inline # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) #print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(yolo): # Determining the FPS of a video having variable frame rate # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos filename = "Cafe_Hyperion.avi" # Determining the total duration of the video clip = VideoFileClip(filename) cap2 = cv2.VideoCapture(filename) co = 0 ret2 = True while ret2: ret2, frame2 = cap2.read() # Determining the total number of frames co += 1 cap2.release() # Computing the average FPS of the video Input_FPS = co / clip.duration # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 frame_count = 0 # Implementing Deep Sort algorithm model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) # Cosine distance is used as the metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture(filename) # Define the codec and create a VideoWriter object to save the output video out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS, (int(video_capture.get(3)), int(video_capture.get(4)))) # To calculate the frames processed by the deep sort algorithm per second fps = 0.0 # Initializing empty variables for counting and tracking purpose queue_track_dict = {} # Count time in queue alley_track_dict = {} # Count time in alley store_track_dict = {} # Count total time in store latest_frame = {} # Track the last frame in which a person was identified reidentified = {} # Yes or No : whether the person has been re-identified at a later point in time plot_head_count_store = [] # y-axis for Footfall Analysis plot_head_count_queue = [] # y-axis for Footfall Analysis plot_time = [] # x-axis for Footfall Analysis # Loop to process each frame and track people while True: ret, frame = video_capture.read() if ret != True: break head_count_store = 0 head_count_queue = 0 t1 = time.time() image = Image.fromarray(frame[...,::-1]) # BGR to RGB conversion boxs = yolo.detect_image(image) features = encoder(frame,boxs) # Getting the detections having score of 0.0 to 1.0 detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression on the bounding boxes boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker to associate tracking boxes to detection boxes tracker.predict() tracker.update(detections) # Defining the co-ordinates of the area of interest pts = np.array([[0, 0],[105,0],[170,85],[0,143]], np.int32) pts = pts.reshape((-1,1,2)) # Queue Area pts2 = np.array([[105,0],[320,0],[320,240],[0,240],[0,143],[170,85]], np.int32) pts2 = pts2.reshape((-1,1,2)) # Alley Region cv2.polylines(frame, [pts], True, (0,255,255), thickness=2) cv2.polylines(frame, [pts2], True, (255,0,255), thickness=1) # Drawing tracker boxes and frame count for people inside the areas of interest for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # Checking if the person is within an area of interest queue_point_test = center_point_inside_polygon(bbox, pts2) alley_point_test = center_point_inside_polygon(bbox, pts) # Checking if a person has been reidentified in a later frame if queue_point_test == 'inside' or alley_point_test == 'inside': if track.track_id in latest_frame.keys(): if latest_frame[track.track_id] != frame_count - 1: reidentified[track.track_id] = 1 # Initializing variables incase a new person has been seen by the model if queue_point_test == 'inside' or alley_point_test == 'inside': head_count_store += 1 if track.track_id not in store_track_dict.keys(): store_track_dict[track.track_id] = 0 queue_track_dict[track.track_id] = 0 alley_track_dict[track.track_id] = 0 reidentified[track.track_id] = 0 # Processing for people inside the Queue Area if queue_point_test == 'inside': head_count_queue += 1 queue_track_dict[track.track_id] += 1 latest_frame[track.track_id] = frame_count cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) wait_time = round((queue_track_dict[track.track_id] / Input_FPS), 2) cv2.putText(frame, str(track.track_id) + ": " + str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4) cv2.putText(frame, str(track.track_id) + ": " + str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2) # Processing for people inside the Alley Region if alley_point_test == 'inside': alley_track_dict[track.track_id] += 1 latest_frame[track.track_id] = frame_count cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2) # Getting the total Store time for a person if track.track_id in store_track_dict.keys(): store_track_dict[track.track_id] = queue_track_dict[track.track_id] + alley_track_dict[track.track_id] # Drawing bounding box detections for people inside the store for det in detections: bbox = det.to_tlbr() # Checking if the person is within an area of interest queue_point_test = center_point_inside_polygon(bbox, pts) alley_point_test = center_point_inside_polygon(bbox, pts2) if queue_point_test == 'inside' or alley_point_test == 'inside': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2) # Video Overlay - Head Count Data at that instant cv2.putText(frame, "Count: " + str(head_count_store), ( 30, 610 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Count: " + str(head_count_store), ( 30, 610 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Calculating the average wait time in queue total_people = len([v for v in queue_track_dict.values() if v > 0]) total_queue_frames = sum(v for v in queue_track_dict.values() if v > 0) avg_queue_frames = 0 if total_people != 0: avg_queue_frames = total_queue_frames / total_people avg_queue_time = round((avg_queue_frames / Input_FPS), 2) # Video Overlay - Average Wait Time in Queue cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', ( 30, 690 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', ( 30, 690 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Calculating the average wait time in the store total_people = len(store_track_dict) total_store_frames = sum(store_track_dict.values()) avg_store_frames = 0 if total_people != 0: avg_store_frames = total_store_frames / total_people avg_store_time = round((avg_store_frames / Input_FPS), 2) # Video Overlay - Average Store time cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', ( 30, 650 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False) cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', ( 30, 650 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False) # Write the frame onto the VideoWriter object out.write(frame) # Calculating the frames processed per second by the model fps = ( fps + (1./(time.time()-t1)) ) / 2 frame_count += 1 # Printing processing status to track completion op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str(round(fps, 2)) print("\r" + op , end = "") # Adding plot values for Footfall Analysis every 2 seconds (hard coded for now) if frame_count % 50 == 0: plot_time.append(round((frame_count / Input_FPS), 2)) plot_head_count_store.append(head_count_store) plot_head_count_queue.append(head_count_queue) # Press Q to stop the video if cv2.waitKey(1) & 0xFF == ord('q'): break # Data Processed as per the video provided print("\n-----------------------------------------------------------------------") print("QUEUE WAIT TIME ( Unique Person ID -> Time spent )\n") for k, v in queue_track_dict.items(): print(k, "->", str(round((v/Input_FPS), 2)) + " seconds") print("\n-----------------------------------------------------------------------") print("ALLEY TIME ( Unique Person ID -> Time spent )\n") for k, v in alley_track_dict.items(): print(k, "->", str(round((v/Input_FPS), 2)) + " seconds") print("\n-----------------------------------------------------------------------") print("STORE TIME ( Unique Person ID -> Time spent )\n") for k, v in store_track_dict.items(): print(k, "->", str(round((v/Input_FPS), 2)) + " seconds") # Defining data to be written into the csv file - Detailed Report csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time', 'Re-Identified'] csv_data = [] csv_row = {} detailed_csv_file = 'Detailed_Store_Report.csv' for k, v in store_track_dict.items(): csv_row = {} if reidentified[k] == 1: reid = 'Yes' else: reid = 'No' csv_row = {csv_columns[0]: k, csv_columns[1]: round((queue_track_dict[k] / Input_FPS), 2), csv_columns[2]: round((v / Input_FPS), 2), csv_columns[3]: reid} csv_data.append(csv_row) # Writing the data into the csv file - Detailed Report with open(detailed_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in csv_data: writer.writerow(data) # Defining data to be written into the csv file - Brief Report csv_columns_brief = ['Total Head Count', 'Total Queue Time', 'Average Queue Time', 'Total Store Time', 'Average Store Time'] brief_csv_file = 'Brief_Store_Report.csv' csv_data_brief = {csv_columns_brief[0]: len(store_track_dict), csv_columns_brief[1]: round((sum(queue_track_dict.values()) / Input_FPS), 2), csv_columns_brief[2]: avg_queue_time, csv_columns_brief[3]: round((sum(store_track_dict.values()) / Input_FPS), 2), csv_columns_brief[4]: avg_store_time} # Writing the data into the csv file - Brief Report with open(brief_csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns_brief) writer.writeheader() writer.writerow(csv_data_brief) # Plotting a time-series line graph for store and queue head count data and saving it as a .png file plt.plot(plot_time, plot_head_count_queue) plt.plot(plot_time, plot_head_count_store) plt.legend(['Queue Head Count', 'Store Head Count'], loc='upper left') plt.xlabel('Time Stamp (in seconds)') plt.ylabel('Head Count') plt.xlim(0, round(frame_count / Input_FPS) + 1) plt.ylim(0, max(plot_head_count_store) + 2) plt.title('Footfall Analysis') plt.savefig('Footfall_Analysis.png', bbox_inches='tight') # Printing plot data for i in range(len(plot_time)): print(plot_time[i], plot_head_count_queue[i], plot_head_count_store[i]) # Releasing objects created video_capture.release() out.release() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 interpreter = None # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # get video name by using split method video_name = video_path.split('/')[-1] video_name = video_name.split('.')[0] # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) # Try to read video if valid return_value, frame = vid.read() if return_value: pass else: print('Invalid video Directory!!!') filename = video_path.split('.')[-2] # VideoOut = None MinimapOut = None # Get total number of frames in a video TotalFrames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) time_milli = vid.get(cv2.CAP_PROP_POS_MSEC) time_milli = time_milli / 1000 # set frame per seconds vid.set(cv2.CAP_PROP_FPS, 1000) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) frame_num = 0 count = 10 ObjectDetector = DetectObject() for _, i in enumerate(tqdm(range(TotalFrames))): return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break # pass in the object detector ObjectDetector.interpreter = interpreter bboxes, frame, result = ObjectDetector.analyzeDetection( return_value, frame, frame_num, FLAGS, infer, encoder, nms_max_overlap, tracker) # loop through the bounding box and export into the ROI folder. for i, j in bboxes.items(): xmin, ymin, w, h = int(j[0]), int(j[1]), int(j[2]), int(j[3]) if w <= 0 or h <= 0: pass else: # ROI Extraction maskedImage = frame[ymin:ymin + h, xmin:xmin + w] roi_name = "./ROI/ROI_frame_%s.jpg" % (str(frame_num)) cv2.imwrite(roi_name, maskedImage) # save transformed image to path # cv2.imshow('frame',result) frame_num += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if FLAGS.tiny: STRIDES = np.array(cfg.YOLO.STRIDES_TINY) XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny) else: STRIDES = np.array(cfg.YOLO.STRIDES) XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4': ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) else: ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) NUM_CLASSES = len(CLASSES) input_size = FLAGS.size try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) times = [] if FLAGS.output: width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) if FLAGS.framework == 'tf': input_layer = tf.keras.layers.Input([input_size, input_size, 3]) if FLAGS.tiny: if FLAGS.model == 'yolov3': feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES) else: feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model) else: if FLAGS.model == 'yolov3': feature_maps = YOLOv3(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) utils.load_weights_v3(model, FLAGS.weights) elif FLAGS.model == 'yolov4': feature_maps = YOLOv4(input_layer, NUM_CLASSES) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode(fm, NUM_CLASSES, i) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors) if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights": utils.load_weights(model, FLAGS.weights) else: model.load_weights(FLAGS.weights).expect_partial() model.summary() elif FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() elif FLAGS.framework == 'trt': saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] max_cosine_distance = 0.7 # 0.5 / 0.7 nn_budget = None model_filename = './weights/tracker/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) key_list = list(CLASSES.keys()) val_list = list(CLASSES.values()) Track_only = [] logging.info("Models loaded!") while True: return_value, frame = vid.read() if not return_value: logging.warning("Empty Frame") break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_size = frame.shape[:2] image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if FLAGS.framework == 'tf': pred_bbox = model.predict(image_data) elif FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred_bbox = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] elif FLAGS.framework == 'trt': batched_input = tf.constant(image_data) pred_bbox = [] result = infer(batched_input) for _, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms if FLAGS.model == 'yolov4': pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE) else: pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES) bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.5) # 0.25 bboxes = utils.nms(bboxes, 0.5, method='nms') # 0.213 boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and CLASSES[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(CLASSES[int(bbox[5])]) boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(frame, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] tracker.predict() tracker.update(detections) tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: # 1 / 5 continue bbox = track.to_tlbr() class_name = track.get_class() tracking_id = track.track_id index = key_list[val_list.index(class_name)] tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) image = utils.draw_bbox(frame, tracked_bboxes, classes=CLASSES, tracking=True) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 36), # 24 cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE) cv2.imshow("Detections", image) if FLAGS.output: out.write(image) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() if FLAGS.output: out.release() cv2.destroyAllWindows()
def read(stack) : print('Process to read: %s' % os.getpid()) yolo = YOLO() # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) max_boxs = 0 face = ['17208019'] #目标上一帧的点 history = {} #id和标签的字典 person = {} #赋予新标签的id列表 change = [] while True: if len(stack) != 0: frame = stack.pop() t1 = time.time() frame_count = 0 localtime = time.asctime(time.localtime(time.time())) utils.draw(frame,line.readline()) # 获取警戒线 transboundaryline = line.readline() utils.draw(frame, transboundaryline) img = Image.fromarray(frame) #img.save('frame.jpg') ''' cv2.line(frame, (837, 393), (930, 300), (0, 255, 255), 3) transboundaryline = t.line_detect_possible_demo(frame) ''' # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if len(boxs) > max_boxs: max_boxs = len(boxs) # Call the tracker tracker.predict() tracker.update(detections) #一帧信息 info = {} target = [] for track in tracker.tracks: #一帧中的目标 per_info = {} if not track.is_confirmed() or track.time_since_update > 1: continue if track.track_id not in person: person[track.track_id] = str(track.track_id) bbox = track.to_tlbr() PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2) PointY = bbox[3] dis = int(PointX) - 1200 try: if dis<15: if track.track_id not in change: person[track.track_id] = face.pop(0) change.append(track.track_id) except: print('非法入侵') #当前目标 if track.track_id not in change: per_info['worker_id'] = 'unknow'+str(track.track_id) else: per_info['worker_id'] = person[track.track_id] #当前目标坐标 yoloPoint = (int(PointX), int(PointY)) per_info['current_point'] = yoloPoint # 卡尔曼滤波预测 if per_info['worker_id'] not in utils.KalmanNmae: utils.myKalman(per_info['worker_id']) if per_info['worker_id'] not in utils.lmp: utils.setLMP(per_info['worker_id']) cpx, cpy = utils.predict(yoloPoint[0], yoloPoint[1], per_info['worker_id']) if cpx[0] == 0.0 or cpy[0] == 0.0: cpx[0] = yoloPoint[0] cpy[0] = yoloPoint[1] if frame_count>20: per_info['next_point'] = (int(cpx), int(cpy)) else: per_info['next_point'] = yoloPoint # 写入越线情况 if per_info['worker_id'] in history: per_info['transboundary'] = 'no' #print(transboundaryline) line1 = [per_info['next_point'],history[per_info['worker_id']]] a = line.IsIntersec2(transboundaryline,line1) if a == '有交点': print('越线提醒') per_info['transboundary'] = 'yes' history[per_info['worker_id']] = per_info['current_point'] frame_count = frame_count+1 #print(per_info) #画目标框 #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, per_info['worker_id'], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) target.append(per_info) info['time'] = localtime #info['frame'] = str(img.tolist()).encode('base64') info['frame'] = 'frame' info['target'] = target #写入josn info_json = json.dumps(info) info_queue.put(info_json) getInfo(info_queue) cv2.imshow("img", frame) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = cfg.PATH + '/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size # tf saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] while True: data = sys.stdin.readline() if data: data = json.loads(data) if data['end']: break frame = np.array(data['frame_image'], dtype=np.uint8) image_data = frame / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # tf batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression( boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # ds = [] # for detection in detections: # d = dict() # d["bbox"] = detection.tlwh.tolist() # d["confidence"] = detection.confidence # d["class"] = detection.class_name # ds.append(d) # # # send data to Node (without tracking...) # print(json.dumps(ds)) #Call the tracker tracker.predict() tracker.update(detections) # Store tracks for json... tracks = [] # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue class_name = track.get_class() t = dict() bbs = track.to_tlbr().tolist() t["class"] = class_name bbox = dict() bbox["left"] = bbs[0] bbox["top"] = bbs[1] bbox["right"] = bbs[2] bbox["bottom"] = bbs[3] t["bbox"] = bbox t["id"] = track.track_id t["score"] = track.detection_actual_score tracks.append(t) #send data to Node! print(json.dumps(tracks))
def main(_argv): avg=[] # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 #regression model load weight_path='./2_input_model_2-3.5%/' loaded_model = tf.keras.models.load_model(weight_path) # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num +=1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] #print("pred_bbox: ",pred_bbox[0]) #print("scores: ",pred_bbox[1]) #print("classes :",pred_bbox[2]) #print("num :",pred_bbox[3]) #print("width :",width) #print("height :",height) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) #print("boxs ",boxs) #print("scores ",scores) #print("classes ",classes) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) #print("indices ",indices) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.putText(frame, "using regress", (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2) #cv2.putText(frame, "Objects being detected: {}".format(count), (5, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 0, 255), 2) cv2.putText(frame, "frame# {}".format(frame_num), (750, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if 'entrance' not in classes: if len(classes)>1: if(contains_duplicates(classes)==False): #color = (50, 89, 170) check_rect=0 width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) ########## set sticker as low priority############# if ((classes[0]=='mat' or 'sensor') and (classes[1]=='mat' or 'sensor')): print("*************NO STK**********************************") color = (50, 89, 170) x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 ################ else condition for sticker ###### else: print("*************USE STK**********************************") if ((classes[0]=='famSticker' or 'okmartSticker' or 'sevenSticker')): color = (60, 120, 40) x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) #### rario #### C1_x=boxs[0][0]+(boxs[0][2]/2) C1_y=boxs[0][1]+(boxs[0][3]/2) C2_x=bboxes[1][0]+(bboxes[1][2]/2) C2_y=bboxes[1][1]+(bboxes[1][3]]/2) Dx = (C2_x - C1_x); Dy = (C2_y - C1_y); #The two rectangles do not intersect, and there are two rectangles partially overlapping in the X-axis direction. The minimum distance is the distance between the lower line of the upper rectangle and the upper line of the lower rectangle if((Dx < ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + rint(bboxes[1][1]+bboxes[1][3])) / 2))): min_dist = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2); #The two rectangles do not intersect. There are two partially overlapping rectangles in the Y-axis direction. The minimum distance is the distance between the right line of the left rectangle and the left line of the right rectangle elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy < ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))): min_dist = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2); #Two rectangles do not intersect, two rectangles that do not overlap in the X-axis and Y-axis directions, the minimum distance is the distance between the two closest vertices, # Using the Pythagorean theorem, it is easy to calculate this distance elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))): int delta_x = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2); int delta_y = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3]))/ 2); min_dist = sqrt(delta_x * delta_x + delta_y * delta_y); #The intersection of two rectangles, the minimum distance is negative, return -1 else: min_dist = -1; if(classes[1]=='mat'): if((min_dist/Dy)<3): predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 else: print("not predict") elif(classes[1]=='sensor'): if((min_dist/Dx)<3): predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 else: print("not predict") elif((classes[1]=='famSticker' or 'okmartSticker' or 'sevenSticker')): color = (60, 120, 40) x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) #### rario #### C1_x=boxs[0][0]+(boxs[0][2]/2) C1_y=boxs[0][1]+(boxs[0][3]/2) C2_x=bboxes[1][0]+(bboxes[1][2]/2) C2_y=bboxes[1][1]+(bboxes[1][3]/2) Dx = (C2_x - C1_x) Dy = (C2_y - C1_y) #The two rectangles do not intersect, and there are two rectangles partially overlapping in the X-axis direction. The minimum distance is the distance between the lower line of the upper rectangle and the upper line of the lower rectangle if((Dx < ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + rint(bboxes[1][1]+bboxes[1][3])) / 2))): min_dist = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2) #The two rectangles do not intersect. There are two partially overlapping rectangles in the Y-axis direction. The minimum distance is the distance between the right line of the left rectangle and the left line of the right rectangle elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy < ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))): min_dist = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2) #Two rectangles do not intersect, two rectangles that do not overlap in the X-axis and Y-axis directions, the minimum distance is the distance between the two closest vertices, # Using the Pythagorean theorem, it is easy to calculate this distance elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))): int delta_x = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2) int delta_y = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3]))/ 2) min_dist = sqrt(delta_x * delta_x + delta_y * delta_y) #The intersection of two rectangles, the minimum distance is negative, return -1 else: min_dist = -1 if(classes[0]=='mat'): if((min_dist/Dy)<3): predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 else: print("not predict") elif(classes[0]=='sensor'): if((min_dist/Dx)<3): predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 else: print("not predict") ############## ########################################## ######## check door size and display #########if check_rect>1 and frame_num !=104: print("check_rect:{}".format(check_rect)) if check_rect>1 : blk = np.zeros(frame.shape, np.uint8) cv2.rectangle(blk, start_point, end_point, color, cv2.FILLED) frame =cv2.addWeighted(frame, 1.0, blk, 0.5, 1) print("predict_BBox Coords (xmin, ymin, xmax, ymax): {}".format((xmin,ymin,xmax,ymax))) else: print("not show predicted bbox") ############################### ######## # select one entrace ######## #if classes.count('entrance')>1: # entrance_num=[] # iou_list=[] # iou_check=[] # for i in range(len(classes)): # if classes[i]=='entrance' # entrance_num.append(i) # if len(classes)>1: # if(contains_duplicates(classes)==False): # color = (50, 89, 170) # width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) # x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax # x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax # reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) # predictions = loaded_model.predict(reg_input) # a1_pred = predictions[0] # b1_pred = predictions[1] # c1_pred = predictions[2] # d1_pred = predictions[3] # xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) # ###IOU### # GT_bbox_area = (xmax - xmin + 1) * ( ymax -ymin + 1) # ########### # ##check entrace## # Pred_bbox_area =(x_bottomright_p - x_topleft_p + 1 ) * ( y_bottomright_p -y_topleft_p + 1) # x_top_left =np.max([x_topleft_gt, x_topleft_p]) # y_top_left = np.max([y_topleft_gt, y_topleft_p]) # x_bottom_right = np.min([x_bottomright_gt, x_bottomright_p]) # y_bottom_right = np.min([y_bottomright_gt, y_bottomright_p]) # # intersection_area = (x_bottom_right- x_top_left + 1) * (y_bottom_right-y_top_left + 1) # # union_area = (GT_bbox_area + Pred_bbox_area - intersection_area) # # iou_check.append(intersection_area/union_area) # # for j in len(iou_check): # if entrance_num[j]<iou_check.max: # track.delete #if(int(track.track_id)>=3 or (int(track.track_id)>10 and int(track.track_id)<20 ) ): #frame_num ###################### draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] if(class_name=='entrance'): if( int(track.track_id)==1 and frame_num>121): print("skip Tracker ID: {}, Class: {}".format(str(track.track_id), class_name)) else: print("RED Tracker ID: {}, Class: {}".format(str(track.track_id), class_name)) blk = np.zeros(frame.shape, np.uint8) cv2.rectangle(blk,(int(bbox[0]*1.05), int(bbox[1]*1.05)), (int(bbox[2]*0.95), int(bbox[3]*0.95)), (255, 0, 0), cv2.FILLED) frame =cv2.addWeighted(frame, 1.0, blk, 0.5, 1) cv2.rectangle(frame, (int(bbox[0]*1.05), int(bbox[1]*1.05)), (int(bbox[2]*0.95), int(bbox[3]*0.95)), color, 2) cv2.rectangle(frame, (int(bbox[0]*1.05), int(bbox[1]*1.05-30)), (int(bbox[0]*1.05)+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1]*1.05)), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]*1.05), int(bbox[1]*1.05-10)),0, 0.75, (255,255,255),2) # if enable info flag then print details about each track if FLAGS.info: print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) avg.append(fps) print("avg fps {}".format(statistics.mean(avg))) cv2.putText(frame, "FPS: %.2f" % fps, (50, 500), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (66, 245, 141), 2) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def deepsort(yolo, args): #nms_max_overlap = 0.3 #nms threshold images_input = True if os.path.isdir(args.input) else False if images_input: # get images list jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg')) jpg_files = glob.glob(os.path.join(args.input, '*.jpg')) frame_capture = jpeg_files + jpg_files frame_capture.sort() else: # create video capture stream frame_capture = cv2.VideoCapture(0 if args.input == '0' else args.input) if not frame_capture.isOpened(): raise IOError("Couldn't open webcam or video") # create video save stream if needed save_output = True if args.output != "" else False if save_output: if images_input: raise IOError("image folder input could be saved to video file") # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later # to convert it to x264 to reduce file size: # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4 # #video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC)) video_FourCC = cv2.VideoWriter_fourcc( *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v") video_fps = frame_capture.get(cv2.CAP_PROP_FPS) video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(args.output, video_FourCC, (5. if args.input == '0' else video_fps), video_size) if args.tracking_classes_path: # load the object classes used in tracking if have, other class # from detector will be ignored tracking_class_names = get_classes(args.tracking_classes_path) else: tracking_class_names = None #create deep_sort box encoder encoder = create_box_encoder(args.deepsort_model_path, batch_size=1) #create deep_sort tracker max_cosine_distance = 0.5 #threshold for cosine distance nn_budget = None metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # alloc a set of queues to record motion trace # for each track id motion_traces = [deque(maxlen=30) for _ in range(9999)] total_obj_counter = [] # initialize a list of colors to represent each possible class label np.random.seed(100) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") i = 0 fps = 0.0 while True: def get_frame(): # get frame from video or image folder if images_input: if i >= len(frame_capture): ret = False frame = None else: ret = True image_file = frame_capture[i] frame = cv2.imread(image_file) else: ret, frame = frame_capture.read() return ret, frame ret, frame = get_frame() if ret != True: break #time.sleep(0.2) i += 1 start_time = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # detect object from image _, out_boxes, out_classnames, out_scores = yolo.detect_image(image) # get tracking objects and convert bbox from (xmin,ymin,xmax,ymax) to (x,y,w,h) boxes, class_names, scores = get_tracking_object( out_boxes, out_classnames, out_scores, tracking_class_names) # get encoded features of bbox area image features = encoder(frame, boxes) # form up detection records detections = [ Detection(bbox, score, feature, class_name) for bbox, score, class_name, feature in zip( boxes, scores, class_names, features) ] # Run non-maximum suppression. #nms_boxes = np.array([d.tlwh for d in detections]) #nms_scores = np.array([d.confidence for d in detections]) #indices = preprocessing.non_max_suppression(nms_boxes, nms_max_overlap, nms_scores) #detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # show all detection result as white box for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(det.class_name), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) track_indexes = [] track_count = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # record tracking info and get bbox track_indexes.append(int(track.track_id)) total_obj_counter.append(int(track.track_id)) bbox = track.to_tlbr() # show all tracking result as color box color = [ int(c) for c in COLORS[track_indexes[track_count] % len(COLORS)] ] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) if track.class_name: cv2.putText(frame, str(track.class_name), (int(bbox[0] + 30), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) track_count += 1 # get center point (x,y) of current track bbox and record in queue center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) motion_traces[track.track_id].append(center) # draw current center point thickness = 5 cv2.circle(frame, (center), 1, color, thickness) #draw motion trace motion_trace = motion_traces[track.track_id] for j in range(1, len(motion_trace)): if motion_trace[j - 1] is None or motion_trace[j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]), (color), thickness) # show tracking statistics total_obj_num = len(set(total_obj_counter)) cv2.putText(frame, "Total Object Counter: " + str(total_obj_num), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(track_count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # refresh window cv2.namedWindow("DeepSORT", 0) cv2.resizeWindow('DeepSORT', 1024, 768) cv2.imshow('DeepSORT', frame) if save_output: #save a frame out.write(frame) end_time = time.time() fps = (fps + (1. / (end_time - start_time))) / 2 # Press q to stop video if cv2.waitKey(1) & 0xFF == ord('q'): break # Release everything if job is finished if not images_input: frame_capture.release() if save_output: out.release() cv2.destroyAllWindows()
def Object_tracking(Yolo, video_path, output_path, class_names, image_size=416, show=False, rectangle_colors=''): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None # initialize deep sort object model_filename = 'models/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam width, height, fps = get_video_capture_info(vid) codec = cv2.VideoWriter_fourcc(*'XVID') # output_path must be .mp4 out = cv2.VideoWriter(output_path, codec, fps, (width, height)) key_list = list(class_names.keys()) val_list = list(class_names.values()) detection_times, tracking_times = [], [] _, frame = vid.read() # BGR while frame is not None: # create the original_frame for display purposes (draw_bboxes) original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) # preprocessing found in datasets.py img = preprocess_image(frame, image_size) t1 = time.time() boxes, class_inds, scores = yolo_predict(yolo, img, frame) t2 = time.time() names = [] for clss in class_inds: names.append(class_names[clss]) features = np.array(encoder(original_frame, boxes)) # Pass detections to the deepsort object and obtain the track information. detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = get_tracker_info(tracker, val_list, key_list) # update the times information t3 = time.time() detection_times.append(t2 - t1) tracking_times.append(t3 - t1) detection_times = detection_times[-20:] tracking_times = tracking_times[-20:] ms, fps, fps2 = efficiency_statistics(detection_times, tracking_times) # draw detection on frame image = draw_bbox(original_frame, tracked_bboxes, class_names, tracking=True, rectangle_colors=rectangle_colors) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # get next frame _, frame = vid.read() # BGR # show and store the results print( "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def main(yolo): start = time.time() #Definition of the parameters max_cosine_distance = 0.5 #0.9 余弦距离的控制阈值 nn_budget = None nms_max_overlap = 0.3 #非极大抑制的阈值 vio_counter = 0 counter = [] #frame counting #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(args["input"]) video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) original_fps = video_capture.get(cv2.CAP_PROP_FPS) output_size = (200, 200) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out2 = cv2.VideoWriter('%s_output.mp4' % (args["input"].split('.')[0]), fourcc, original_fps, output_size) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) #fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter( './output/' + args["input"][43:57] + "_" + args["class"] + '_output.mp4', fourcc, original_fps, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 first, mouse_frame = video_capture.read() cv2.namedWindow('DrawLine') cv2.resizeWindow('DrawLine', 1280, 720) while True: cv2.setMouseCallback('DrawLine', draw_line) cv2.imshow('DrawLine', mouse_frame) if cv2.waitKey(0) == ord('c'): break cv2.destroyAllWindows() ## first, mouse_frame = video_capture.read() cv2.namedWindow('DrawALine') cv2.resizeWindow('DrawALine', 1280, 720) while True: cv2.setMouseCallback('DrawALine', draw_Aline) cv2.imshow('DrawALine', mouse_frame) if cv2.waitKey(0) == ord('a'): break cv2.destroyAllWindows() ## first, mouse_frame = video_capture.read() cv2.namedWindow('DrawBLine') cv2.resizeWindow('DrawBLine', 1280, 720) while True: cv2.setMouseCallback('DrawBLine', draw_Bline) cv2.imshow('DrawBLine', mouse_frame) if cv2.waitKey(0) == ord('b'): break cv2.destroyAllWindows() while True: ret, frame = video_capture.read() if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs, class_names = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] for det in detections: bbox = det.to_tlbr() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # 상자그리기 indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 100, (0, 255, 0), 2) if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 100, (0, 255, 0), 2) i += 1 # bbox_center_point(x,y) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) # track_id[center] pts[track.track_id].append(center) bts[track.track_id].append(center) thickness = 2 # center point cv2.circle(frame, (center), 1, (0, 255, 0), 2) #intersect A line for j in range(1, len(pts[track.track_id])): if pts[track.track_id][0] is None or pts[ track.track_id][1] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (255, 0, 0), 2) if intersect(pts[track.track_id][j - 1], pts[track.track_id][j], line[0], line[1]): violation_id[track.track_id] = True #if intersect(bts[track.track_id][0], bts[track.track_id][1], A_line[0], A_line[1]): #frame_count[track.track_id] = frame_index #if intersect(bts[track.track_id][0], bts[track.track_id][1], B_line[0], B_line[1]): #if frame_index == frame_count[track.track_id]: #continue #speed[track.track_id]=324./(frame_index-frame_count[track.track_id]) #print(str(speed[track.track_id])+"km/h id:"+str(track.track_id)) #if speed[track.track_id] >20: #highspeed.append(speed[track.track_id]) #this is for speed meter for j in range(1, len(bts[track.track_id])): if bts[track.track_id][0] is None or bts[ track.track_id][1] is None: continue if intersect(bts[track.track_id][0], bts[track.track_id][1], A_line[0], A_line[1]): frame_count[track.track_id] = frame_index if intersect(bts[track.track_id][0], bts[track.track_id][1], B_line[0], B_line[1]): if frame_index == frame_count[track.track_id]: continue speed[track.track_id] = 324. / ( frame_index - frame_count[track.track_id]) print( str(speed[track.track_id]) + "km/h id:" + str(track.track_id)) if speed[track.track_id] > 20: highspeed.append(speed[track.track_id]) if violation_id[track.track_id] == True: indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) cv2.line(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) cv2.line(frame, (int(bbox[0]), int(bbox[3])), (int(bbox[2]), int(bbox[1])), (0, 0, 255), 2) cv2.putText(frame, str(track.track_id) + "offender", (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 100, (0, 0, 255), 2) if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 100, (0, 255, 255), 2) i += 1 # bbox_center_point(x,y) center = (int(((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) # track_id[center] pts[track.track_id].append(center) thickness = 2 result_top = int(center[1] - output_size[1] / 2) result_bottom = int(center[1] + output_size[1] / 2) result_left = int(center[0] - output_size[0] / 2) result_right = int(center[0] + output_size[0] / 2) if result_top > 0 and result_bottom > 0 and result_left > 0 and result_right > 0: result_img = frame[result_top:result_bottom, result_left:result_right].copy() out2.write(result_img) cv2.imshow('result_img', result_img) # center point #cv2.circle(frame, (center), 1, (20,20,20), 1) #cv2.circle(frame, (center), 1, (20, 20, 20), thickness) count = len(set(counter)) vio_counter = violation_id.count(True) cv2.line(frame, line[0], line[1], (0, 255, 255), 2) cv2.line(frame, A_line[0], A_line[1], (0, 255, 255), 2) cv2.line(frame, B_line[0], B_line[1], (0, 255, 255), 2) #cv2.line(frame, line[0], line[1], (0, 255, 255), 1) cv2.putText( frame, "Speed meter:" + str(round(highspeed[len(highspeed) - 1], 2)) + "km/h id:" + str(track.track_id), (int(20), int(180)), 0, 5e-3 * 120, (0, 0, 255), 2) cv2.putText(frame, "Violated Counter: " + str(vio_counter), (int(20), int(150)), 0, 5e-3 * 120, (0, 0, 255), 2) cv2.putText(frame, "Total Object Counter: " + str(count), (int(20), int(120)), 0, 5e-3 * 120, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(i), (int(20), int(80)), 0, 5e-3 * 120, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 100, (0, 255, 0), 2) #cv2.putText(frame, "Violated Counter: " + str(vio_counter), (int(20), int(150)),0, 5e-3 * 100, (0, 0 ,255),1) #cv2.putText(frame, "Total Object Counter: "+str(count),(int(20), int(120)),0, 5e-3 * 100, (0,255,0),1) #cv2.putText(frame, "Current Object Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 100, (0,255,0),1) #cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 100, (0,255,0),1) cv2.namedWindow("YOLO3_Deep_SORT", 0) cv2.resizeWindow('YOLO3_Deep_SORT', 1280, 720) cv2.imshow('YOLO3_Deep_SORT', frame) if writeVideo_flag: #save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 #fpss = 1./(time.time()-t1) #print(set(counter)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): #video_capture.stop() break print(" ") print("[Finish]") end = time.time() if len(pts[track.track_id]) != None: print(args["input"][43:57] + ": " + str(count) + " " + str(class_name) + ' Found') else: print("[No Found]") video_capture.release() if writeVideo_flag: #video_capture.stop() out.release() list_file.close() cv2.destroyAllWindows()
def process_frame(): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = False #video_capture = cv2.VideoCapture(0) producer = KafkaProducer( bootstrap_servers='master:6667', value_serializer=lambda m: json.dumps(m).encode('utf8')) consumer = KafkaConsumer('test', bootstrap_servers=['master:6667']) for msg in consumer: json_from_consumer = json.loads(msg[-6]) decoded = base64.b64decode(json_from_consumer['image']) filename = '/home/haohsiang/Vigilancia-Distributed/codev1frame.jpg' # I assume you have a way of picking unique filenames with open(filename, 'wb') as f: f.write(decoded) frame = cv2.imread(filename) #ret, frame = video_capture.read() # frame shape 640*480*3 #if ret != True: # break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) print("box_num", len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) print( str(track.track_id) + ' :' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3])) print(dt.datetime.now().time()) result = { 'ID': str(track.track_id), 'timestamp': dt.datetime.now().isoformat(), 'location_x': str(bbox[0]), 'w': str(bbox[2]) } producer.send('resultstream', result) time.sleep(0.3) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = 0.0 fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(yolo): # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 1.0 output_format = 'mp4' video_name = 'bus4_2in_4out.mp4' file_path = join('data_files/videos', video_name) output_name = 'save_data/out_' + video_name[0:-3] + output_format initialize_door_by_yourself = False door_array = None # Deep SORT model_filename = '../model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True writeVideo_flag = True asyncVideo_flag = False counter = Counter(counter_in=0, counter_out=0, track_id=0) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_name, fourcc, 15, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() ret, first_frame = video_capture.read() if door_array is None: if initialize_door_by_yourself: door_array = select_object(first_frame)[0] print(door_array) else: all_doors = read_door_info('data_files/doors_info_links.json') door_array = all_doors[video_name] border_door = door_array[3] error_values = [] truth = get_truth(video_name) while True: ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: total_count = counter.return_total_count() true_total = truth.inside + truth.outside err = abs(total_count - true_total) / true_total log_res = "in video: {}\n predicted / true\n counter in: {} / {}\n counter out: {} / {}\n" \ " total: {} / {}\n error: {}\n______________\n".format(video_name, counter.counter_in, truth.inside, counter.counter_out, truth.outside, total_count, true_total, err) with open('../log_results.txt', 'w') as file: file.write(log_res) print(log_res) error_values.append(err) break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 2) for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: score = "%.2f" % (det.confidence * 100) + "%" rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) rect_door = Rectangle(int(door_array[0]), int(door_array[1]), int(door_array[2]), int(door_array[3])) intersection = rect_head & rect_door if intersection: squares_coeff = rect_square(*intersection) / rect_square( *rect_head) cv2.putText( frame, score + " inter: " + str(round(squares_coeff, 3)), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 100, 255), 5) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # first appearence of object with id=track.id if track.track_id not in counter.people_init or counter.people_init[ track.track_id] == 0: counter.obj_initialized(track.track_id) rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) res = rect_head & rect_door if res: inter_square = rect_square(*res) head_square = rect_square(*rect_head) # was initialized in door, probably going in if (inter_square / head_square) >= 0.8: counter.people_init[track.track_id] = 2 # initialized in the bus, mb going out elif (inter_square / head_square) <= 0.4 or bbox[3] > border_door: counter.people_init[track.track_id] = 1 # res is None, means that object is not in door contour else: counter.people_init[track.track_id] = 1 counter.people_bbox[track.track_id] = bbox counter.cur_bbox[track.track_id] = bbox adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 5) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.putText( frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) id_get_lost = [ track.track_id for track in tracker.tracks if track.time_since_update >= 25 and track.age >= 29 ] id_inside_tracked = [ track.track_id for track in tracker.tracks if track.age > 60 ] for val in counter.people_init.keys(): # check bbox also cur_c = find_centroid(counter.cur_bbox[val]) init_c = find_centroid(counter.people_bbox[val]) vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1]) if val in id_get_lost and counter.people_init[val] != -1: # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if vector_person[1] > 70 and counter.people_init[ val] == 2: # and counter.people_bbox[val][3] > border_door \ counter.get_in() elif vector_person[1] < -70 and counter.people_init[val] == 1: counter.get_out() counter.people_init[val] = -1 print(f"person left frame") print(f"current centroid - init : {cur_c} - {init_c}\n") print(f"vector: {vector_person}\n") del val # elif val in id_inside_tracked and val not in id_get_lost and counter.people_init[val] == 1 \ # and bb_intersection_over_union(counter.cur_bbox[val], door_array) <= 0.3 \ # and vector_person[1] > 0: # and \ # # counter.people_bbox[val][3] > border_door: # counter.get_in() # # counter.people_init[val] = -1 # print(f"person is tracked for a long time") # print(f"current centroid - init : {cur_c} - {init_c}\n") # print(f"vector: {vector_person}\n") # imaggg = cv2.line(frame, find_centroid(counter.cur_bbox[val]), # find_centroid(counter.people_bbox[val]), # (0, 0, 255), 7) # cv2.imshow('frame', imaggg) # cv2.waitKey(0) ins, outs = counter.show_counter() cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (10, 30), 0, 1e-3 * frame.shape[0], (255, 0, 0), 5) cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.resizeWindow('image', 1400, 800) cv2.imshow('image', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows() mean_error = np.mean(error_values) print("mean error for {} video: {}".format(video_name, mean_error))
def run_multiple(sequence_dir, detection_dir, output_dir, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, max_age, nn_budget, display, save_images_dir): """Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_dir : str Path to the detections file. output_dir : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. save_images_dir : string If not None, save the tracking result to indicated directories """ all_sequences = sorted(glob.glob(os.path.join(sequence_dir, '*'))) if len(all_sequences) == 0: raise ValueError("There is no folder in " + sequence_dir) for sequence_dir in all_sequences: video_name = sequence_dir.split('/')[-1] output_file = os.path.join(output_dir, video_name + '.npy') #'.txt') print(video_name) detection_file = os.path.join(detection_dir, video_name + '.npy') try: os.stat(detection_file) os.stat(sequence_dir) except: raise NameError(detection_file + ' or ' + sequence_dir + " doesn't exist!") seq_info = gather_sequence_info(sequence_dir, detection_file) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_age=max_age) results = [] def frame_callback(vis, frame_idx): # print("Processing frame %05d" % frame_idx) # Load image and generate detections. detections = create_detections(seq_info["detections"], frame_idx, min_detection_height) detections = [ d for d in detections if d.confidence >= min_confidence ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Update visualization. if display: image = cv2.imread(seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR) image_name = seq_info["image_filenames"][frame_idx].split( '/')[-1] vis.set_image(image.copy(), image_name) # vis.draw_detections(detections) vis.draw_trackers(tracker.tracks) # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlwh() # print("track._class:",track._class) # print("track.confidence:",track.confidence) # print("track.feature_to_save:",track.feature_to_save.shape) results.append( np.hstack([ frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], track._class, track.confidence, track.feature_to_save ])) # Run tracker. if display: visualizer = visualization.Visualization( seq_info, update_ms=5, save_images_dir=save_images_dir) else: visualizer = visualization.NoVisualization(seq_info) visualizer.run(frame_callback) # Store results. np.save(output_file, np.array(results)) # f = open(output_file, 'w') # for row in results: # print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % ( # row[0], row[1], row[2], row[3], row[4], row[5]),file=f) # f.close() # shutdown the window if display: cv2.destroyWindow(visualizer.viewer._caption)
def main(): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 tsocket = TSocket.TSocket(__HOST, __PORT) transport = TTransport.TFramedTransport(tsocket) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Client(protocol) extract_rate = 5 #抽帧频率 transport.open() # deep_sort metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture("MOT16-09.mp4") frame_rate = video_capture.get(5) sample_interval = 1. / extract_rate print(frame_rate, extract_rate, sample_interval) delay = 1. / frame_rate print(delay) fps = 0.0 ############################################## loc_dic = {} in_count = 0 #in 计数器 out_count = 0 #out 计数器 ############################################## frame_count = 0 global last_stat_time last_stat_time = time.time() w = 640 h = 480 last_sample_time = 0.0 while True: start = time.time() ret, frame = video_capture.read() if ret != True: break frame = cv2.resize(frame, (w, h)) now = time.time() if last_sample_time + sample_interval <= now: t1 = time.time() boxes, features = encode( client, frame) #image压缩为jpg格式,发送到gpu server进行yolov3检测,得到features后返回 last_sample_time = time.time() nfps = 1. / (time.time() - t1) print(nfps) if fps <= 0.1: fps = nfps else: fps = (fps + nfps) / 2 print("detection fps= %f" % (fps)) #print(features[0])#128 tt1 = time.time() detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features) ] #print(detections) # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) print("tracker used:", time.time() - tt1) for track in tracker.tracks: #print(track.track_id) if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) id_num = str(track.track_id) if id_num in loc_dic: #判断上一帧运动轨迹 #向右运动,且经过分界线 last_x = loc_dic[id_num] if bbox[0] > last_x and (bbox[0] > float(w / 2) and last_x < float(w / 2)): print("##################in one#################") loc_dic[id_num] = bbox[0] in_count += 1 #向左移动,且经过分界线 elif bbox[0] < last_x and (bbox[0] < float(w / 2) and last_x > float(w / 2)): print("###################out one################") loc_dic[id_num] = bbox[0] out_count += 1 else: loc_dic[id_num] = bbox[0] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) frame_count += 1 cv2.line(frame, (int(w / 2), int(0)), (int(w / 2), int(h)), (255, 255, 255)) cv2.putText(frame, "in number:" + str(in_count), (10, 40), 0, 1e-3 * h, (255, 0, 0), 2) cv2.putText(frame, "out number:" + str(out_count), (10, 60), 0, 1e-3 * h, (255, 0, 0), 2) ret, frame = cv2.imencode('.jpg', frame) dt["img"] = frame.tobytes() wait_time = delay - (time.time() - start) #print(wait_time) if wait_time > 0: time.sleep(wait_time) video_capture.release()
def run(sequence_dir, detection_file, output_file, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget, display, lambda_): """Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_file : str Path to the detections file. output_file : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. """ seq_info = gather_sequence_info(sequence_dir, detection_file) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, _lambda=lambda_) results = [] print("processing " + sequence_dir) def frame_callback(vis, frame_idx): if frame_idx % 100 == 0: print("Processing frame %05d" % frame_idx) # Load image and generate detections. detections = create_detections(seq_info["detections"], frame_idx, min_detection_height) detections = [d for d in detections if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Update visualization. if display: image = cv2.imread(seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR) vis.set_image(image.copy()) vis.draw_detections(detections) vis.draw_trackers(tracker.tracks) # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlwh() results.append([ frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3] ]) # Run tracker. if display: visualizer = visualization.Visualization(seq_info, update_ms=5) else: visualizer = visualization.NoVisualization(seq_info) visualizer.run(frame_callback) # Store results. f = open(output_file, 'w') for row in results: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (row[0], row[1], row[2], row[3], row[4], row[5]), file=f)
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = False writeVideo_flag = True asyncVideo_flag = False file_path = ['out.mp4'] #file_path = ['veed.mp4'] cols = math.ceil(math.sqrt(len(file_path))) rows = math.ceil(len(file_path) / cols) singleHeight = int(screenHeight / rows) singleWidth = int(screenWidth / cols) out_image = np.zeros((screenHeight, screenWidth, 3), np.uint8) #if asyncVideo_flag : # video_capture = VideoCaptureAsync(file_path) #else: # video_capture = cv2.VideoCapture(file_path) video_captures = [] cameras = [] prvTimes = [] localgloballink = [] imgsSaved = 2 for i in range(len(file_path)): video_captures.append(cv2.VideoCapture(file_path[i])) cameras.append(Camera()) prvTimes.append(time.time()) #if asyncVideo_flag: # video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) h = int(video_capture.cap.get(4)) else: w = screenWidth h = screenHeight fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() frame = [] globalPersonCount = 1 for file in file_path: frame.append(None) curFrame = 1 gtIndex = 0 while True: allimages = [] for index in range(len(file_path)): cur = time.time() ret, frame[index] = video_captures[index].read( ) # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[index][..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame[index], boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip( boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame[index], str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1) #nabin's code hsvImage = cv2.cvtColor(frame[index], cv2.COLOR_BGR2HSV) hungarianmatrix = [] indexx = 0 if (len(cameras[index].PersonData) > 0): diff = cur - prvTimes[index] times = int(diff / 0.05) prvTimes[index] = cur for data in cameras[index].PersonData: if (data.kf != None): for i in range(times): data.kf.predict() nodata = len(cameras[index].PersonData) for z in range(len(cameras[index].PersonData)): cameras[index].PersonData[z].updated = False for det in detections: bbox = det.to_tlbr() if (nodata == 0): persondata = PersonData() persondata.color = [ int(random.randint(0, 255)), int(random.randint(0, 255)), int(random.randint(0, 255)) ] persondata.positions.append([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]) persondata.positions.append([(bbox[0] + bbox[2]) / 2 + 0.1, (bbox[1] + bbox[3]) / 2 + 0.1 ]) persondata.top = bbox[0] persondata.left = bbox[1] persondata.lastPosition = bbox persondata.localPersonIndex = cameras[ index].localPersonCount persondata.kf = KF(persondata.positions[0][0], persondata.positions[0][1], 0, 0) persondata.globalPersonIndex = globalPersonCount localgloballink.append([ globalPersonCount, index, persondata.localPersonIndex ]) globalPersonCount = globalPersonCount + 1 cameras[index].localPersonCount = cameras[ index].localPersonCount + 1 hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] persondata.histogram_h = cv2.calcHist([hsvCroppedImage], [0], None, [180], [0, 180]) persondata.histogram_h = np.divide(persondata.histogram_h, ((bbox[3] - bbox[1]) * (bbox[2] - bbox[0]))) cameras[index].PersonData.append(persondata) else: hungarianmatrix.append([]) hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] histogram_h = cv2.calcHist([hsvCroppedImage], [0], None, [180], [0, 180]) histogram_h = np.divide(histogram_h, ((bbox[3] - bbox[1]) * (bbox[2] - bbox[0]))) for z in range(len(cameras[index].PersonData)): postions = len( cameras[index].PersonData[z].positions) - 1 cov = np.cov( np.asarray( cameras[index].PersonData[z].positions).T) #mahal=(distance.mahalanobis([cameras[index].PersonData[z].kf.calulatedmean[0],cameras[index].PersonData[z].kf.calulatedmean[2]],[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2],cov))/ frame[index].shape[0] mahal = math.sqrt((cameras[index].PersonData[z]. positions[postions][0] - (bbox[0] + bbox[2]) / 2)**2 + (cameras[index].PersonData[z]. positions[postions][1] - (bbox[1] + bbox[3]) / 2)**2 ) / frame[index].shape[0] #mahal=math.sqrt((cameras[index].PersonData[z].kf.calulatedmean[0]-(bbox[0]+bbox[2])/2)**2+(cameras[index].PersonData[z].kf.calulatedmean[1]-(bbox[1]+bbox[3])/2)**2)/ frame[index].shape[0] #mahal=getMahalanbolisDist(cameras[index].PersonData[z].positions,[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2]) mahal += (np.sum( np.absolute( np.subtract( histogram_h, cameras[index].PersonData[z].histogram_h))) ) hungarianmatrix[indexx].append(mahal) indexx = indexx + 1 print(hungarianmatrix) if (nodata != 0): row_ind = [] col_ind = [] if (hungarianmatrix != []): row_ind, col_ind = linear_sum_assignment(hungarianmatrix) indexx = 0 for pos in range(len(col_ind)): if (hungarianmatrix[row_ind[pos]][col_ind[pos]] < 2 - detections[row_ind[pos]].confidence): bbox = detections[row_ind[pos]].to_tlbr() detections[row_ind[pos]].localProcessed = True cameras[index].PersonData[col_ind[pos]].updated = True cameras[index].PersonData[col_ind[pos]].top = bbox[0] cameras[index].PersonData[col_ind[pos]].left = bbox[1] cameras[index].PersonData[col_ind[pos]].kf.update([ (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2 ]) cameras[index].PersonData[ col_ind[pos]].lastPosition = bbox cameras[index].PersonData[ col_ind[pos]].positions.append([ (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2 ]) hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] toadd = detections[row_ind[pos]].confidence - 0.7 cameras[index].PersonData[ col_ind[pos]].histogram_h = np.add( np.multiply( cv2.calcHist([hsvCroppedImage], [0], None, [180], [0, 180]), toadd * 1 / (((bbox[3] - bbox[1]) * (bbox[2] - bbox[0])))), np.multiply( cameras[index].PersonData[ col_ind[pos]].histogram_h, 1 - toadd)) if (len(cameras[index].PersonData[ col_ind[pos]].positions) > 6): cameras[index].PersonData[ col_ind[pos]].positions.pop(0) for pos in range(len(detections)): if (hasattr(detections[pos], 'localProcessed') == False): bbox = detections[pos].to_tlbr() #if(bbox[1]>hsvImage.shape[0]): # continue ndata = PersonData() ndata.top = bbox[0] ndata.left = bbox[1] ndata.positions.append([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]) ndata.positions.append([(bbox[0] + bbox[2]) / 2 + 0.1, (bbox[1] + bbox[3]) / 2 + 0.1]) ndata.kf = KF((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2, 0, 0) ndata.color = [ int(random.randint(0, 255)), int(random.randint(0, 255)), int(random.randint(0, 255)) ] ndata.localPersonIndex = cameras[ index].localPersonCount ndata.lastPosition = bbox ndata.kf = KF(ndata.positions[0][0], ndata.positions[0][1], 0, 0) cameras[index].localPersonCount = cameras[ index].localPersonCount + 1 localgloballink.append( [globalPersonCount, index, ndata.localPersonIndex]) ndata.globalPersonIndex = globalPersonCount globalPersonCount = globalPersonCount + 1 hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] ndata.histogram_h = cv2.calcHist([hsvCroppedImage], [0], None, [180], [0, 180]) ndata.histogram_h = np.divide(ndata.histogram_h, ((bbox[3] - bbox[1]) * (bbox[2] - bbox[0]))) cameras[index].PersonData.append(ndata) #allimages.append([]) if (len(file_path)) != 1: for pdata in cameras[index].PersonData: if (pdata.updated): nimg = cv2.resize( frame[index][int(pdata.lastPosition[1] ):int(pdata.lastPosition[3]), int(pdata.lastPosition[0] ):int(pdata.lastPosition[2])], (64, 128), interpolation=cv2.INTER_AREA) #allimages[len(allimages)-1].append(np.array(nimg)) pdata.imgs.append(nimg) if (len(pdata.imgs) == imgsSaved + 1): pdata.imgs.pop(0) #nabin's code ends if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame[index], "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1) #if(len(cameras)==2): #globalHungarian=[] # for fdata in range(len(cameras[0].PersonData)): # globalHungarian.append([]) # for pdata in cameras[1].PersonData: # globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(pdata.histogram_h,cameras[0].PersonData[fdata].histogram_h)))) # # row_ind, col_ind = linear_sum_assignment(globalHungarian) # for row in range(len(row_ind)): # cv2.putText(frame[0], chr(ord('a')+row),(int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][0]), int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2) # cv2.putText(frame[1], chr(ord('a')+row),(int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][0]), int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2) if (len(cameras) == 1): hypos = [] hyposPos = [] for person in cameras[0].PersonData: if (person.updated == True): cv2.putText( frame[0], str(person.localPersonIndex), (int(person.positions[len(person.positions) - 1][0]), int(person.positions[len(person.positions) - 1][1])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1) if (person.updated == True): hypos.append(person.localPersonIndex + 1) hyposPos.append([person.top, person.left]) gts = [] gtsPos = [] while gt[gtIndex][0] == curFrame and gtIndex < len(gt): gts.append(gt[gtIndex][1]) gtsPos.append([gt[gtIndex][2], gt[gtIndex][3]]) gtIndex = gtIndex + 1 curFrame = curFrame + 1 dis = mm.distances.norm2squared_matrix(np.array(gtsPos), np.array(hyposPos)) acc.update(gts, hypos, dis) else: edges = [] globalHungarian = [] for i in range(len(cameras)): for j in range(i + 1, len(cameras)): globalHungarian = [] x = 0 xindexes = [] yindexes = [] stackedimgages = [] for pos in range(imgsSaved): stackedimgages.append([]) for person in cameras[j].PersonData: if (person.updated == True and len(person.imgs) == imgsSaved): stackedimgages[pos].append(person.imgs[pos]) for fdata in range(len(cameras[i].PersonData)): if (cameras[i].PersonData[fdata].updated == False or len(cameras[i].PersonData[fdata].imgs) != imgsSaved): continue xindexes.append(fdata) y = 0 triplet = test(cameras[i].PersonData[fdata].imgs[0], stackedimgages[0]) for pos in range(1, imgsSaved): triplet = np.add( triplet, test(cameras[i].PersonData[fdata].imgs[pos], stackedimgages[pos])) globalHungarian.append([]) for pdata in range(len(cameras[j].PersonData)): if (cameras[j].PersonData[pdata].updated == False or len(cameras[j].PersonData[pdata].imgs) != imgsSaved): continue #globalHungarian[x].append(triplet[y]) globalHungarian[x].append( np.sum( np.absolute( np.subtract( cameras[j].PersonData[pdata]. histogram_h, cameras[i]. PersonData[fdata].histogram_h))) * 2 + triplet[y]) if (x == 0): yindexes.append(pdata) #globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h)))) #globalHungarian[fdata].append(triplet[pdata]) y = y + 1 x = x + 1 if (len(globalHungarian) != 0): row_ind, col_ind = linear_sum_assignment( globalHungarian) print(globalHungarian) for pos in range(len(row_ind)): if (globalHungarian[row_ind[pos]][col_ind[pos]] < 3.2): edges.append( (cameras[i].PersonData[xindexes[ row_ind[pos]]].globalPersonIndex, cameras[j].PersonData[yindexes[ col_ind[pos]]].globalPersonIndex)) Allcliques = cliques(edges, len(cameras), globalPersonCount).getCliques() for cam in cameras: for person in cam.PersonData: isinclique = True for clique in Allcliques: if person.globalPersonIndex in clique: isinclique = False break if isinclique: Allcliques.append([person.globalPersonIndex]) for sclique in Allcliques: indexes = [] cur = min(sclique) for i in range(len(sclique)): isInclique = False prvIndex = cameras[localgloballink[ sclique[i] - 1][1]].PersonData[localgloballink[ sclique[i] - 1][2]].prvglobalFoundOutPersonIndex if prvIndex == -1: isInclique = True else: for snclique in Allcliques: if prvIndex in snclique: isInclique = True break if isInclique == True: cameras[localgloballink[sclique[i] - 1][1]].PersonData[ localgloballink[sclique[i] - 1] [2]].globalFoundOutPersonIndex = cur else: cameras[localgloballink[sclique[i] - 1][1]].PersonData[ localgloballink[sclique[i] - 1] [2]].globalFoundOutPersonIndex = prvIndex for cam in range(len(cameras)): for person in cameras[cam].PersonData: if person.updated == True: cv2.putText( frame[cam], str(person.globalFoundOutPersonIndex), (int(person.positions[len(person.positions) - 1][0]), int(person.positions[len(person.positions) - 1][1])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 2) for sclique in Allcliques: for i in range(len(sclique)): cameras[localgloballink[sclique[i] - 1][1]].PersonData[ localgloballink[sclique[i] - 1] [2]].prvglobalFoundOutPersonIndex = cameras[ localgloballink[sclique[i] - 1][1]].PersonData[ localgloballink[sclique[i] - 1] [2]].globalFoundOutPersonIndex out_image.fill(0) vindex = 0 for row in range(rows): for col in range(cols): if (vindex == len(file_path)): break vidshape = frame[vindex].shape curvidheightratio = vidshape[0] / singleHeight curvidwidthratio = vidshape[1] / singleWidth if (curvidwidthratio < curvidheightratio): #height is small resizedwidth = int(vidshape[1] / vidshape[0] * singleHeight) nimg = cv2.resize(frame[vindex], (resizedwidth, singleHeight), interpolation=cv2.INTER_AREA) widthpos = int( (singleWidth - resizedwidth) / 2) + col * singleWidth out_image[row * singleHeight:(row + 1) * singleHeight, widthpos:widthpos + resizedwidth] = nimg else: #width is small resizedheight = int(vidshape[0] / vidshape[1] * singleWidth) nimg = cv2.resize(frame[vindex], (singleWidth, resizedheight), interpolation=cv2.INTER_AREA) heightpos = int(((singleHeight - resizedheight) / 2) + row * singleHeight) out_image[heightpos:heightpos + resizedheight, col * singleWidth:(col + 1) * singleWidth] = nimg vindex = vindex + 1 if (len(cameras) == 1): mh = mm.metrics.create() summary = mh.compute(acc, metrics=['num_frames', 'mota', 'motp'], name='acc') print(summary) cv2.imshow('', out_image) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(out_image) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(): yolo = YOLO() max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) parser = argparse.ArgumentParser( description='Training codes for Openpose using Tensorflow') parser.add_argument('--checkpoint_path', type=str, default='checkpoints/train/2018-12-13-16-56-49/') parser.add_argument('--backbone_net_ckpt_path', type=str, default='checkpoints/vgg/vgg_19.ckpt') parser.add_argument('--image', type=str, default=None) # parser.add_argument('--run_model', type=str, default='img') parser.add_argument('--video', type=str, default=None) parser.add_argument('--train_vgg', type=bool, default=True) parser.add_argument('--use_bn', type=bool, default=False) parser.add_argument('--save_video', type=str, default='result/our.mp4') args = parser.parse_args() checkpoint_path = args.checkpoint_path logger.info('checkpoint_path: ' + checkpoint_path) with tf.name_scope('inputs'): raw_img = tf.placeholder(tf.float32, shape=[None, None, None, 3]) img_size = tf.placeholder(dtype=tf.int32, shape=(2, ), name='original_image_size') img_normalized = raw_img / 255 - 0.5 # define vgg19 with slim.arg_scope(vgg.vgg_arg_scope()): vgg_outputs, end_points = vgg.vgg_19(img_normalized) # get net graph logger.info('initializing model...') net = PafNet(inputs_x=vgg_outputs, use_bn=args.use_bn) hm_pre, cpm_pre, added_layers_out = net.gen_net() hm_up = tf.image.resize_area(hm_pre[5], img_size) cpm_up = tf.image.resize_area(cpm_pre[5], img_size) # hm_up = hm_pre[5] # cpm_up = cpm_pre[5] smoother = Smoother({'data': hm_up}, 25, 3.0) gaussian_heatMat = smoother.get_output() max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat, window_shape=(3, 3), pooling_type='MAX', padding='SAME') tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat, tf.zeros_like(gaussian_heatMat)) logger.info('initialize saver...') # trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') # trainable_var_list = [] trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') if args.train_vgg: trainable_var_list = trainable_var_list + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19') restorer = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'), name='vgg_restorer') saver = tf.train.Saver(trainable_var_list) logger.info('initialize session...') config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.group(tf.global_variables_initializer())) logger.info('restoring vgg weights...') restorer.restore(sess, args.backbone_net_ckpt_path) logger.info('restoring from checkpoint...') #saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path)) saver.restore(sess, args.checkpoint_path + 'model-59000.ckpt') logger.info('initialization done') writeVideo_flag = True if args.image is None: if args.video is not None: cap = cv2.VideoCapture(args.video) w = int(cap.get(3)) h = int(cap.get(4)) else: cap = cv2.VideoCapture("images/video.mp4") #cap = cv2.VideoCapture("rtsp://*****:*****@192.168.43.51:554//Streaming/Channels/1") #cap = cv2.VideoCapture("http://*****:*****@192.168.1.111:8081") #cap = cv2.VideoCapture("rtsp://*****:*****@192.168.1.106:554//Streaming/Channels/1") _, image = cap.read() #print(_,image) if image is None: logger.error("Can't read video") sys.exit(-1) fps = cap.get(cv2.CAP_PROP_FPS) ori_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) ori_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) #print(fps,ori_w,ori_h) if args.save_video is not None: fourcc = cv2.VideoWriter_fourcc(*'MP4V') video_saver = cv2.VideoWriter('result/our.mp4', fourcc, fps, (ori_w, ori_h)) logger.info('record vide to %s' % args.save_video) logger.info('fps@%f' % fps) size = [int(654 * (ori_h / ori_w)), 654] h = int(654 * (ori_h / ori_w)) time_n = time.time() #print(time_n) max_boxs = 0 person_track = {} yolo2 = YOLO2() while True: face = [] cur1 = conn.cursor() # 获取一个游标 sql = "select * from worker" cur1.execute(sql) data = cur1.fetchall() for d in data: # 注意int类型需要使用str函数转义 name = str(d[1]) + '_' + d[2] face.append(name) cur1.close() # 关闭游标 _, image_fist = cap.read() #穿戴安全措施情况检测 img = Image.fromarray( cv2.cvtColor(image_fist, cv2.COLOR_BGR2RGB)) image, wear = yolo2.detect_image(img) image = np.array(image) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # # 获取警戒线 cv2.line(image, (837, 393), (930, 300), (0, 255, 255), 3) transboundaryline = t.line_detect_possible_demo(image) #openpose二维姿态检测 img = np.array(cv2.resize(image, (654, h))) # cv2.imshow('raw', img) img_corner = np.array( cv2.resize(image, (360, int(360 * (ori_h / ori_w))))) img = img[np.newaxis, :] peaks, heatmap, vectormap = sess.run( [tensor_peaks, hm_up, cpm_up], feed_dict={ raw_img: img, img_size: size }) bodys = PoseEstimator.estimate_paf(peaks[0], heatmap[0], vectormap[0]) image, person = TfPoseEstimator.draw_humans(image, bodys, imgcopy=False) #取10右脚 13左脚 foot = [] if len(person) > 0: for p in person: foot_lr = [] if 10 in p and 13 in p: foot_lr.append(p[10]) foot_lr.append(p[13]) if len(foot_lr) > 1: foot.append(foot_lr) fps = round(1 / (time.time() - time_n), 2) image = cv2.putText(image, str(fps) + 'fps', (10, 15), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255)) time_n = time.time() #deep目标检测 image2 = Image.fromarray(image_fist) boxs = yolo.detect_image(image2) features = encoder(image, boxs) detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if len(boxs) > max_boxs: max_boxs = len(boxs) # print(max_boxs) # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if max_boxs < track.track_id: tracker.tracks.remove(track) tracker._next_id = max_boxs + 1 if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2) PointY = bbox[3] if track.track_id not in person_track: track2 = copy.deepcopy(track) person_track[track.track_id] = track2 else: track2 = copy.deepcopy(track) bbox2 = person_track[track.track_id].to_tlbr() PointX2 = bbox2[0] + ((bbox2[2] - bbox2[0]) / 2) PointY2 = bbox2[3] distance = math.sqrt( pow(PointX - PointX2, 2) + pow(PointY - PointY2, 2)) if distance < 120: person_track[track.track_id] = track2 else: # print('last',track.track_id) dis = {} for key in person_track: bbox3 = person_track[key].to_tlbr() PointX3 = bbox3[0] + ( (bbox3[2] - bbox3[0]) / 2) PointY3 = bbox3[3] d = math.sqrt( pow(PointX3 - PointX, 2) + pow(PointY3 - PointY, 2)) dis[key] = d dis = sorted(dis.items(), key=operator.itemgetter(1), reverse=False) track2.track_id = dis[0][0] person_track[dis[0][0]] = track2 tracker.tracks.remove(track) tracker.tracks.append(person_track[track.track_id]) # 写入class try: box_title = face[track2.track_id - 1] except Exception as e: box_title = str(track2.track_id) + "_" + "unknow" if box_title not in workers: wid = box_title.split('_')[0] localtime = time.asctime(time.localtime(time.time())) workers[box_title] = wk.Worker() workers[box_title].set(box_title, localtime, (int(PointX), int(PointY))) cur2 = conn.cursor() # 获取一个游标 sql2 = "UPDATE worker SET in_time='" + localtime + "' WHERE worker_id= '" + wid + "'" cur2.execute(sql2) cur2.close() # 关闭游标 else: localtime = time.asctime(time.localtime(time.time())) yoloPoint = (int(PointX), int(PointY)) foot_dic = {} wear_dic = {} for f in foot: fp = [] footCenter = ((f[0][0] + f[1][0]) / 2, (f[0][1] + f[1][1]) / 2) foot_dis = int( math.sqrt( pow(footCenter[0] - yoloPoint[0], 2) + pow(footCenter[1] - yoloPoint[1], 2))) #print(foot_dis) fp.append(f) fp.append(footCenter) foot_dic[foot_dis] = fp #print(box_title, 'sss', foot_dic) foot_dic = sorted(foot_dic.items(), key=operator.itemgetter(0), reverse=False) workers[box_title].current_point = foot_dic[0][1][1] workers[box_title].track_point.append( workers[box_title].current_point) #print(box_title,'sss',foot_dic[0][1][1]) mytrack = str(workers[box_title].track_point) wid = box_title.split('_')[0] #卡尔曼滤波预测 if wid not in KalmanNmae: myKalman(wid) if wid not in lmp: setLMP(wid) cpx, cpy = predict(workers[box_title].current_point[0], workers[box_title].current_point[1], wid) if cpx[0] == 0.0 or cpy[0] == 0.0: cpx[0] = workers[box_title].current_point[0] cpy[0] = workers[box_title].current_point[1] workers[box_title].next_point = (int(cpx), int(cpy)) workers[box_title].current_footR = foot_dic[0][1][0][0] workers[box_title].current_footL = foot_dic[0][1][0][1] cur3 = conn.cursor() # 获取一个游标 sql = "UPDATE worker SET current_point= '" + str( workers[box_title].current_point ) + "' , current_footR = '" + str( workers[box_title].current_footR ) + "',current_footL = '" + str( workers[box_title].current_footL ) + "',track_point = '" + mytrack + "',next_point = '" + str( workers[box_title].next_point ) + "' WHERE worker_id= '" + wid + "'" cur3.execute(sql) cur3.close() #写入安全措施情况 if len(wear) > 0: for w in wear: wear_dis = int( math.sqrt( pow(w[0] - yoloPoint[0], 2) + pow(w[1] - yoloPoint[1], 2))) wear_dic[wear_dis] = w wear_dic = sorted(wear_dic.items(), key=operator.itemgetter(0), reverse=False) if wear_dic[0][0] < 120: cur4 = conn.cursor() # 获取一个游标 if wear[wear_dic[0][1]] == 1: if len(workers[box_title].wear['no helmet'] ) == 0: workers[box_title].wear[ 'no helmet'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[ box_title].wear['no helmet']: workers[box_title].wear[ 'no helmet'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 elif wear[wear_dic[0][1]] == 2: if len(workers[box_title]. wear['no work cloths']) == 0: workers[box_title].wear[ 'no work cloths'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[ box_title].wear[ 'no work cloths']: workers[box_title].wear[ 'no work cloths'].append( localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 elif wear[wear_dic[0][1]] == 3: if len(workers[box_title]. wear['unsafe wear']) == 0: workers[box_title].wear[ 'unsafe wear'].append(localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 else: if localtime not in workers[ box_title].wear['unsafe wear']: workers[box_title].wear[ 'unsafe wear'].append( localtime) sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'" cur4.execute(sql) cur4.close() # 关闭游标 #写入越线情况 if len(workers[box_title].track_point) > 4: for i in range(len(transboundaryline)): p1 = (transboundaryline[i][0], transboundaryline[i][1]) p2 = (transboundaryline[i][2], transboundaryline[i][3]) p3 = workers[box_title].track_point[-2] p4 = workers[box_title].track_point[-1] a = t.IsIntersec(p1, p2, p3, p4) if a == '有交点': cur5 = conn.cursor() # 获取一个游标 cur6 = conn.cursor() # 获取一个游标 cur5.execute( "select time from transboundary where worker_id = '" + wid + "' ") qurrytime = cur5.fetchone() cur5.close() # 关闭游标 if qurrytime == None: print('越线') sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'" cur6.execute(sql) cur6.close() # 关闭游标 else: temp1 = 0 for qt in qurrytime: if qt == localtime: temp1 = 1 if temp1 == 0: print('越线') sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'" cur6.execute(sql) cur6.close() # 关闭游标 if len(workers[box_title].track_point) >= 20: workers[box_title].previous_point = workers[ box_title].track_point[-5] conn.commit() try: cv2.putText(image, face[track2.track_id - 1], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) except Exception as e: cv2.putText(image, "unknow", (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) if args.video is not None: image[27:img_corner.shape[0] + 27, :img_corner.shape[1]] = img_corner # [3:-10, :] cv2.imshow(' ', image) if args.save_video is not None: video_saver.write(image) cv2.waitKey(1) else: image = common.read_imgfile(args.image) size = [image.shape[0], image.shape[1]] if image is None: logger.error('Image can not be read, path=%s' % args.image) sys.exit(-1) h = int(654 * (size[0] / size[1])) img = np.array(cv2.resize(image, (654, h))) cv2.imshow('ini', img) img = img[np.newaxis, :] peaks, heatmap, vectormap = sess.run( [tensor_peaks, hm_up, cpm_up], feed_dict={ raw_img: img, img_size: size }) cv2.imshow('in', vectormap[0, :, :, 0]) bodys = PoseEstimator.estimate_paf(peaks[0], heatmap[0], vectormap[0]) image = TfPoseEstimator.draw_humans(image, bodys, imgcopy=False) cv2.imshow(' ', image) cv2.waitKey(0)
def main(yolo): # Determining the FPS of a video having variable frame rate # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos filename = "clip1.mp4" # Determining the total duration of the video clip = VideoFileClip(filename) cap2 = cv2.VideoCapture(filename) co = 0 ret2 = True while ret2: ret2, frame2 = cap2.read() # Determining the total number of frames co += 1 cap2.release() # Computing the average FPS of the video Input_FPS = co / clip.duration # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 frame_count = 0 # Implementing Deep Sort algorithm model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # Cosine distance is used as the metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture(filename) # Define the codec and create a VideoWriter object to save the output video out = cv2.VideoWriter( 'output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS, (int(video_capture.get(3)), int(video_capture.get(4)))) # To calculate the frames processed by the deep sort algorithm per second fps = 0.0 # Loop to process each frame and track people while True: ret, frame = video_capture.read() if ret != True: break t1 = time.time() step1 = cv2.edgePreservingFilter(frame, flags=1, sigma_s=15, sigma_r=0.1) step2 = cv2.detailEnhance(step1, sigma_s=40, sigma_r=0.1) cv2.imwrite('preprocessing.jpg', step2) im = Image.open("preprocessing.jpg") enhancer = ImageEnhance.Sharpness(im) enhanced_im = enhancer.enhance(6.0) enhanced_im.save("enhanced.jpg") frame = cv2.imread('enhanced.jpg') image = Image.fromarray(frame[..., ::-1]) # BGR to RGB conversion boxs = yolo.detect_image(image) features = encoder(frame, boxs) # Getting the detections having score of 0.0 to 1.0 detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression on the bounding boxes boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] head_count = 0 # Drawing bounding box detections for people inside the store for det in detections: head_count += 1 bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, str(head_count), (50, 50), 0, 1.5, (0, 255, 77), 2) # Write the frame onto the VideoWriter object out.write(frame) # Calculating the frames processed per second by the model fps = (fps + (1. / (time.time() - t1))) / 2 frame_count += 1 # Printing processing status to track completion op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str( round(fps, 2)) print("\r" + op, end="") # Releasing objects created video_capture.release() out.release() cv2.destroyAllWindows()
def Setup(self): metric = nn_matching.NearestNeighborDistanceMetric("cosine", 0.2, None) self.tracker = Tracker(metric, max_iou_distance=0.7, max_age=200, n_init=4) self.log('init')
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = stream #capture.open(best.url) except: vid = stream #capture.open(best.url) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture('top_view1.avi') video_capture_1 = cv2.VideoCapture('demo1.avi') # if writeVideo_flag: # # Define the codec and create VideoWriter object # w = int(video_capture.get(3)) # h = int(video_capture.get(4)) # fourcc = cv2.VideoWriter_fourcc(*'MJPG') # out = cv2.VideoWriter('output1.avi', fourcc, 15, (w, h)) # list_file = open('detection.txt', 'w') # frame_index = -1 fps = 0.0 fig = plt.figure() fig1 = plt.figure() count = 0 count1 = 0 x_list = [] y_list = [] x_list1 = [] y_list1 = [] # ax1 = fig.add_subplot(1, 1, 1) while True: ret, frame = video_capture.read() # frame shape 640*480*3 ret1, frame1 = video_capture_1.read() # frame shape 640*480*3 # if ret == True: # print(' VIDEO FOUND') # t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb image1 = Image.fromarray(frame1[..., ::-1]) # bgr to rgb boxs = yolo.detect_image(image) boxs1 = yolo.detect_image(image1) print("box_co-ordinate = ", (boxs)) print("box_co-ordinate = ", (boxs1)) features = encoder(frame, boxs) features1 = encoder(frame1, boxs1) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] detections1 = [ Detection1(bbox1, 1.0, feature1) for bbox1, feature1 in zip(boxs1, features1) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] boxes1 = np.array([d.tlwh for d in detections1]) scores1 = np.array([d.confidence for d in detections1]) indices1 = preprocessing.non_max_suppression(boxes1, nms_max_overlap, scores1) detections1 = [detections1[i] for i in indices1] # Call the tracker tracker.predict() tracker.update(detections) # tracker1.predict() # tracker1.update(detections1) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) # for track1 in tracker1.tracks: # if not track1.is_confirmed() or track1.time_since_update > 1: # continue # bbox1 = track1.to_tlbr() # cv2.rectangle(frame1, (int(bbox1[0]), int(bbox1[1])), (int(bbox1[2]), int(bbox1[3])), (255, 255, 255), 2) # cv2.putText(frame1, str(track1.track_id), (int(bbox1[0]), int(bbox1[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() # print((type(bbox))) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # print("The co-ordinates are:", int(bbox[0]), int(bbox[1])) # for det1 in detections1: # bbox1 = det1.to_tlbr() # # print((type(bbox))) # cv2.rectangle(frame1, (int(bbox1[0]), int(bbox1[1])), (int(bbox1[2]), int(bbox1[3])), (255, 0, 0), 2) # # print("The co-ordinates are:", int(bbox[0]), int(bbox[1])) try: for i in boxs: x = (i[0] + i[2]) / 2 y = (i[1] + i[3]) / 2 count += 1 x_list.append(x) y_list.append(y) if count == 1: points = plt.scatter(x_list, y_list) elif count > 1: print('x:', x_list, 'y:', y_list) points.remove() points = plt.scatter(x_list, y_list) # plt.pause(0.9) x_list.clear() y_list.clear() except: continue try: for i in boxs1: x = (i[0] + i[2]) / 2 y = (i[1] + i[3]) / 2 count1 += 1 x_list1.append(x) y_list1.append(y) if count1 == 1: points = plt.scatter(x_list1, y_list1) elif count1 > 1: print('x:', x_list1, 'y:', y_list1) points.remove() points = plt.scatter(x_list1, y_list1) # plt.pause(0.9) x_list1.clear() y_list1.clear() except: continue # # redraw the canvas fig.canvas.draw() fig1.canvas.draw() # convert canvas to image img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') img = img.reshape(fig.canvas.get_width_height()[::-1] + (3, )) # img is rgb, convert to opencv's default bgr img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # for second frame img1 = np.fromstring(fig1.canvas.tostring_rgb(), dtype=np.uint8, sep='') img1 = img1.reshape(fig1.canvas.get_width_height()[::-1] + (3, )) # img is rgb, convert to opencv's default bgr img1 = cv2.cvtColor(img1, cv2.COLOR_RGB2BGR) # display image with opencv or any operation you like cv2.imshow("plot", img) cv2.imshow('frame', frame) cv2.imshow("plot2", img1) cv2.imshow('frame1', frame1) # if writeVideo_flag: # # save a frame # out.write(frame) # frame_index = frame_index + 1 # list_file.write(str(frame_index) + ' ') # if len(boxs) != 0: # for i in range(0, len(boxs)): # list_file.write(str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') # list_file.write('\n') # fps = (fps + (1. / (time.time() - t1))) / 2 # print("fps= %f" % (fps)) # # Press Q to stop! if cv2.waitKey(5) & 0xFF == ord('q'): break video_capture.release() # if writeVideo_flag: # out.release() # list_file.close() cv2.destroyAllWindows()
from deep_sort import nn_matching from deep_sort.detection import Detection from deep_sort.tracker import Tracker from tools import generate_detections as gdet from deep_sort.detection import Detection as ddet warnings.filterwarnings('ignore') max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = '/home/nvidia/hello_rospy/src/beginner_tutorials/scripts/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True #video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object #w = int(video_capture.get(3)) #h = int(video_capture.get(4)) #fourcc = cv2.VideoWriter_fourcc(*'MJPG') #out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 yolo=YOLO()
def run(sequence_dir, detection_file, output_file, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget, display): """Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_file : str Path to the detections file. output_file : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. IDnum : int Tracking ID_num """ # 프레임 사진 있는 위치, 영상번호 (seq_info["image_filenames"]) # 프레임번호 (seq_info["image_filenames"][frame_idx]) # 다 gather_sequence_info에 있음 if 'y' == input("is there ID [y/n] : "): IDnum = int(input("ID_num for tracking : ")) ######### #range_down = input("ID tracking range_down : ") ######### #range_up = input("ID tracking range_up : ") ######### else: IDnum = 0 if 'y' == input("foot display [y/n] : "): ############# y값 함수 구하고나면 발 위치보기 foot_dis = True else: foot_dis = False seq_info = gather_sequence_info(sequence_dir, detection_file) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) results = [] target_h = [] update_ms = seq_info["update_ms"] max_frame_idx = seq_info["max_frame_idx"] i = 0 def frame_callback(vis, frame_idx): #print("Processing frame %05d" % frame_idx) # Load image and generate detections. detections = create_detections(seq_info["detections"], frame_idx, min_detection_height) detections = [d for d in detections if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) # (x, y, w, h) scores = np.array([d.confidence for d in detections]) # Detector confidence score indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Update visualization. if display or foot_dis: image = cv2.imread(seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR) vis.set_image(image.copy()) if IDnum == 0: vis.draw_detections(detections) vis.draw_trackers(tracker.tracks) else: #찾는 ID 있을 때 vis.draw_target_trackers(tracker.tracks, IDnum) if foot_dis: # Tracking 하는 ID만 보여주고 발 표시 !!!!!!! vis.draw_foot(tracker.tracks, IDnum) # h 저장 h_file = os.path.dirname(output_file) # result/text/ with open(h_file + '/ID_h.txt', 'r') as f_hi: line_splits = [ int(l.split(',')[1]) for l in f_hi.read().splitlines()[1:] ] # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue ############################################################### tracking 대신 bbox만 넣어주고 계산하기 bbox = track.to_tlwh() results.append([ frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3] ]) if int(track.track_id) == int(IDnum): #print("find ID-01") if bbox[1] + bbox[3] < seq_info["image_size"][0]: target_h.append( [track.track_id, bbox[1] + bbox[3], bbox[3]]) # id의 y값 별 h if ( frame_idx >= 40 ): # start frame 이걸로 설정 ################################ # MOT16-02에서 할머니 멈추기 전까지가 260frame endT = 117 * update_ms / 1000 # endT = 5 vel_py.foot_world(frame_idx, track.track_id, bbox, IDnum, update_ms, max_frame_idx, endT) # velocity 추정 끝나면 count = 0으로 계산 그만시킴 # foot 10 보기 for i in range(10): if int(bbox[1] + bbox[3]) > line_splits[i] - 1 and int( bbox[1] + bbox[3]) < line_splits[i] + 1: print(int(bbox[1] + bbox[3])) vis.draw_foot(tracker.tracks, IDnum) # Run tracker. if display: visualizer = visualization.Visualization(seq_info, update_ms=5) else: visualizer = visualization.NoVisualization(seq_info) ##### 영상 저장 ########### #video_output_dir = os.path.curdir + '/result/video' #video_filename = os.path.join(video_output_dir, "%s_all_tracking.avi" % (os.path.basename(sequence_dir))) # video name은 seq_info["sequence_name"] #video_filename = os.path.join(video_output_dir, "%s_ID%s_tracking.avi" % (os.path.basename(sequence_dir), IDnum)) #video_filename = os.path.join(video_output_dir, "%s_ID%s_foot 10.avi" % (os.path.basename(sequence_dir), IDnum)) #if video_filename is not None: # visualizer.viewer.enable_videowriter(video_filename) ######################### visualizer.run(frame_callback) # Store results. f = open(output_file, 'w') for row in results: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (row[0], row[1], row[2], row[3], row[4], row[5]), file=f) # [frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]] f.close()
def get_detect(id_video): global control_color dq = deque(maxlen=1) t_read_video = threading.Thread(target=read_video, args=(dq, )) t_put_data = threading.Thread(target=put_data) t_read_video.start() t_put_data.start() # myout = save_video(video_reader, "./video.mp4", sz) max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 counter = [] my_track_dict = {} #save the info of track_id track_smooth_dict = {} #smooth the imshow pts = [deque(maxlen=30) for _ in range(9999)] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) list_file = open('detection_rslt.txt', 'w') save_file = mk_dir() num = 0 t1 = time.time() while True: #avoid the memory error. if len(my_track_dict) > 50: my_track_dict = {} print(len(my_track_dict)) if dq: img = dq.pop() else: time.sleep(0.05) continue start_time = time.time() num += 1 if num % 500 == 1: cv2.imwrite(save_file + "/_{}.jpg".format(num), img) img_h, img_w, img_ch = img.shape print(img.shape) #2、防止裁剪或推理时把画的框裁剪上 show_image = img.copy() frame = img.copy() #the predict of person. boxs, confidence, class_names = [], [], [] out = preson_detect(img) #transform the object detection data to input tracter for i in range(len(out)): #========my_setting============== if out[i, 2] > 0.7: # print(out[i]) left = int(out[i, 3] * img_w) top = int(out[i, 4] * img_h) p_w = int(out[i, 5] * img_w - out[i, 3] * img_w) p_h = int(out[i, 6] * img_h - out[i, 4] * img_h) right = left + p_w bottom = top + p_h #detect the person in setting area. point1 = [int((left + right) / 2), bottom] my_index = inner_point(point1) if my_index: boxs.append([left, top, p_w, p_h]) class_names.append("person") confidence.append(out[i, 2]) #start use the tracker features = encoder(frame, boxs) # score to 1.0 here. detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] #setting detect time t2 = time.time() detect_time = t2 - t1 #========my_setting============== control_time = 0.2 #detect one time in m second if detect_time > control_time: t1 = time.time() for det, track in zip(detections, tracker.tracks): # if not track.is_confirmed() or track.time_since_update > 1: if not track.is_confirmed() or track.time_since_update > 1: continue #print(track.track_id) #draw the boxs of object detection. pbox = det.to_tlbr() #cv2.rectangle(frame,(int(pbox[0]), int(pbox[1])), (int(pbox[2]), int(pbox[3])),(255,255,255), 2) my_key = str(int(track.track_id)) #========my_setting============== #if my_key increase or time lt 3s, will be re_detection. if my_key not in my_track_dict.keys( ) or detect_time > control_time: # print(my_key) # print(my_track_dict.keys()) #the code of processing the person box. label_dict = get_labels(img, pbox) print("**" * 20, label_dict) if type(label_dict) == type(None): continue if "coat" not in label_dict.keys(): continue my_track_dict[my_key] = label_dict # draw the attr of person. frame = draw_person_attr(frame, my_track_dict[my_key], pbox, control_color) indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] #define the color of rectangle. if my_track_dict[my_key]["coat"] == "Yes": color_rect = (0, 255, 0) else: color_rect = (0, 0, 255) #center_loc = [int((bbox[0]+bbox[2])/2), int((bbox[1]+bbox[3])/2)] if my_key not in track_smooth_dict.keys(): print("---------------------------------------------------->") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color_rect), 3) track_smooth_dict[my_key] = bbox else: fbox = track_smooth_dict[my_key] a = int((bbox[0] + fbox[0]) / 2) b = int((bbox[1] + fbox[1]) / 2) c = int((bbox[2] + fbox[2]) / 2) d = int((bbox[3] + fbox[3]) / 2) cv2.rectangle(frame, (a, b), (c, d), (color_rect), 3) track_smooth_dict[my_key] = bbox #draw the boxs of track. #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3) if True: cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) i += 1 # 控制上传频率 if num % 200 == 1: my_result = my_track_dict[my_key] pic_name = str(int(time.time())) + "_" + my_key # put_data(my_key, my_result, frame) q_put_img.append([pic_name, my_result, frame]) count = len(set(counter)) #draw the gurdline. draw_muti(frame) # cv2.putText(frame, "Total Pedestrian Counter: "+str(count),(int(20), int(120)),0, 5e-3 * 200, (0,255,0),2) # cv2.putText(frame, "Current Pedestrian Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2) end_time = time.time() my_one_time = (end_time - start_time) * 1000 print("====={}=====".format(num), my_one_time) frame = cv2.resize(frame, (640, 360)) ret2, jpeg = cv2.imencode('.jpg', frame) yield (b'--frame\r\n' b'application/octet-stream: image/jpeg\r\n\r\n' + jpeg.tobytes() + b'\r\n\r\n')
def run(video_file, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget, display): """Run multi-target tracker on a particular sequence. Parameters ---------- video_file : str Path to the video file. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. """ cfg_file = "yolo3/cfg/yolov3.cfg" weight_file = "yolo3/yolov3.weights" #weight_file = 'yolo3/backup/MOT17Det/yolov3-mot17det_10000.weights' use_cuda = 1 det_model = create_model(cfg_file, weight_file, use_cuda) seq_info = gather_video_info(video_file) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # just for warming up img = cv2.imread('./000001.jpg') sized = cv2.resize(img, (det_model.width, det_model.height)) sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB) boxes = do_detect(det_model, sized, 0.5, 0.4, use_cuda) def frame_callback(vis, frame_idx): #print("Processing frame %05d" % frame_idx) #global total_frames, total_time ret, img = seq_info['video_cap'].read() if not ret: print('there is no frame!') sys.exit(1) #time_0 = time.time() # Load image and generate detections. detections = create_det_from_model(det_model, img, 0.5, 0.4, min_detection_height, use_cuda) #detections = create_detections( # seq_info["detections"], frame_idx, min_detection_height) #if seq_info['groundtruth'] is not None: # gts = create_gts(seq_info['groundtruth'], frame_idx) #detections = create_detections( # seq_info["detections"], frame_idx, seq_info["image_filenames"][frame_idx], encoder, min_detection_height) detections = [d for d in detections if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) #time_1 = time.time() #total_time += time_1 - time_0 #total_frames += 1 # Update visualization. if display: #image = cv2.imread( # seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR) #vis.set_image(image.copy()) vis.set_image(img.copy()) #vis.draw_detections(detections) #vis.draw_detections(gts) vis.draw_trackers(tracker.tracks) # Store results. # NOTE: store from n_init frame(1-based index) # for track in tracker.tracks: # # NOTE: the condition is different from that in drawing tracks # if not track.is_confirmed() or track.time_since_update > 1: # continue # # NOTE: store estimated state instead of observation # bbox = track.to_tlwh() # results.append([ # frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]]) # Run tracker. if display: visualizer = visualization.Visualization(seq_info, update_ms=1) else: visualizer = visualization.NoVisualization(seq_info) visualizer.run(frame_callback)
def get_keypoints_and_id_from_img_without_normalize(self, img): # KP ordering of body parts NECK = 1 R_SHOULDER = 2 R_ELBOW = 3 R_WRIST = 4 L_SHOULDER = 5 L_ELBOW = 6 L_WRIST = 7 MID_HIP = 8 R_HIP = 9 R_KNEE = 10 R_ANKLE = 11 L_HIP = 12 L_KNEE = 13 L_ANKLE = 14 # Define bodyparts to get the selected keypoints BODY_PARTS = [ NECK, R_SHOULDER, R_ELBOW, R_WRIST, L_SHOULDER, L_ELBOW, L_WRIST, MID_HIP, R_HIP, R_KNEE, R_ANKLE, L_HIP, L_KNEE, L_ANKLE ] # Set tracker max_cosine_distance = 0.2 nn_budget = 100 metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # Get data points (datum) datum = op.Datum() datum.cvInputData = img self.opWrapper.emplaceAndPop(op.VectorDatum([datum])) # Initialize lists arr = [] boxes = [] list_of_pose_temp = [] list_of_pose_and_id = [] try: # Get highest and lowest keypoints for kp_idx, keypoint in enumerate(datum.poseKeypoints): pop_all(arr) x_high = 0 x_low = 9999 y_high = 0 y_low = 9999 for count, x in enumerate(keypoint): # Avoid x=0 and y=0 because some keypoints that are not discovered. # This "if" is to define the LOWEST and HIGHEST discovered keypoint. if x[0] != 0 and x[1] != 0: if x_high < x[0]: x_high = x[0] if x_low > x[0]: x_low = x[0] if y_high < x[1]: y_high = x[1] if y_low > x[1]: y_low = x[1] # Add pose keypoints to a dictionary if count in BODY_PARTS: KP = {'x': x[0], 'y': x[1]} # Append dictionary to array arr.append(KP) # Find the highest and lowest position of x and y # (Used to draw rectangle) if y_high - y_low > x_high - x_low: height = y_high - y_low width = x_high - x_low else: height = x_high - x_low width = y_high - y_low # Draw rectangle (get width and height) y_high = int(y_high + height / 40) y_low = int(y_low - height / 12) x_high = int(x_high + width / 5) x_low = int(x_low - width / 5) # # Normalize keypoint list_of_pose_temp.append(arr) # Make the box boxes.append([x_low, y_low, width, height]) # Encode the features inside the designated box features = self.encoder(datum.cvOutputData, boxes) # For a non-empty item add to the detection array def nonempty(xywh): return xywh[2] != 0 and xywh[3] != 0 detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features) if nonempty(bbox) ] # Run non-maxima suppression. np_boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( np_boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Make pose and person ID list if kp_idx == len(datum.poseKeypoints) - 1: for track_idx, track in enumerate(tracker.tracks): bbox = track.to_tlwh() list_of_pose_and_id.append({ "Keypoints": list_of_pose_temp[track_idx], "ID": track.track_id }) return list_of_pose_and_id except Exception as e: print(end="")
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = False os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp" video_capture = cv2.VideoCapture("rtsp://192.168.4.103:8080/h264.sdp", cv2.CAP_FFMPEG) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 frame = cv2.resize(frame, (640, 360)) if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
if cv2.__version__.split(".")[0] == "2": frame_count = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) else: # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # initialize tracking module if args.get_tracking: tracking_objs = args.tracking_objs.split(",") tracker_dict = {} tracking_results_dict = {} tmp_tracking_results_dict = {} for tracking_obj in tracking_objs: metric = metric = nn_matching.NearestNeighborDistanceMetric( "cosine", args.max_cosine_distance, args.nn_budget) tracker_dict[tracking_obj] = Tracker( metric, max_iou_distance=args.max_iou_distance) tracking_results_dict[tracking_obj] = [] tmp_tracking_results_dict[tracking_obj] = {} # videoname = os.path.splitext(os.path.basename(videofile))[0] videoname = os.path.basename(videofile) video_obj_out_path = None if args.obj_out_dir is not None: # not saving box json to save time video_obj_out_path = os.path.join(args.obj_out_dir, videoname) if not os.path.exists(video_obj_out_path): os.makedirs(video_obj_out_path) # 3. read frame one by one cur_frame = 0 vis_count = 0 frame_stack = []
def main(): start = time.time() first = start #Definition of the parameters max_cosine_distance = 0.5#0.9 余弦距离的控制阈值 nn_budget = None nms_max_overlap = 0.3 #非极大抑制的阈值 counter = [] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) find_objects = ['person', 'fire_extinguisher', 'fireplug', 'car', 'bicycle', 'motorcycle'] yolo = YOLO() for cnt in range(1, 2): video_path = "./t1_video/t1_video_%05d" % cnt images = os.listdir(video_path) images.sort() print(images[0]) trackers = [] counters = [] for idx in range(0, 6): metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) trackers.append(Tracker(metric)) count = [] counters.append(count) tracker_time = 0 yolo_time = 0 for image_path in images: # image_path = video_path + "/t1_video_%05d_%05d.jpg" % (1, fc) t1 = time.time() # print(video_path + "/" + image_path) frame = cv2.imread(video_path + "/" + image_path) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb yolo_start = time.time() yolo_dict = yolo.detect_image(image) yolo_end = time.time() yolo_time += (yolo_end - yolo_start) for idx in range(0, 6): # print(idx) tracker = trackers[idx] counter = counters[idx] boxs = yolo_dict.get(find_objects[idx]) if boxs == None: continue features = encoder(frame, boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker t_start = time.time() tracker.predict() tracker.update(detections) t_end = time.time() tracker_time += (t_end - t_start) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue #boxes.append([track[0], track[1], track[2], track[3]]) counter.append(int(track.track_id)) ####################################### num_person = len(set(counters[0])) num_fire_extinguisher = len(set(counters[1])) num_fireplug = len(set(counters[2])) num_car = len(set(counters[3])) num_bicycle = len(set(counters[4])) num_motocycle = len(set(counters[5])) ress.append(present_result(cnt, num_person, num_fire_extinguisher, num_fireplug, num_car, num_bicycle, num_motocycle)) t1_res_cai["track1_results"] = ress with open('t1_res_cai.json', 'w') as make_file: json.dump(t1_res_cai, make_file, ensure_ascii=False, indent=4) ####################################### for idx in range(0, 6): print(len(set(counters[idx])), end=" ") end = time.time() print(str(':: total:%.2f yolo:%.2f tracker:%.2f' % ((end - start),yolo_time, tracker_time))) start = end last = time.time() print(str(':: %.2f' % (last - first)))