def run(sequence_dir, detection_file, output_file, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget, display): """Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_file : str Path to the detections file. output_file : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. """ seq_info = gather_sequence_info(sequence_dir, detection_file) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) results = [] test_dict = {} def frame_callback(vis, frame_idx): if frame_idx % 100 == 0: print("Tracked frame %d" % frame_idx) # Load image and generate detections. detections = create_detections(seq_info["detections"], frame_idx, min_detection_height) detections = [d for d in detections if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Update visualization. if display: # print("came here-1") image = cv2.imread(seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR) # cv2.imshow("test", image) # cv2.waitKey() # print("came here-2") vis.set_image(image.copy()) # print("came here-3") vis.draw_detections(detections) # print("came here-4") vis.draw_trackers(tracker.tracks, test_dict) # print("came here-5") new_track_id = [] # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlwh() results.append([ frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3] ]) # print(frame_idx, track.track_id) new_track_id.append(track.track_id) # print("track-id-list:", new_track_id) for i in new_track_id: if not i in test_dict: test_dict[i] = datetime.now().strftime('%H:%M:%S') # print(test_dict) # Run tracker. if display: visualizer = visualization.Visualization(seq_info, update_ms=5) else: visualizer = visualization.NoVisualization(seq_info) visualizer.run(frame_callback) # Store results. f = open(output_file, 'w') for row in results: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (row[0], row[1], row[2], row[3], row[4], row[5]), file=f)
def main(yolo): points=[] tpro=0. # Definition of the parameters max_cosine_distance = 0.9 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break; frame=cv2.flip(frame,1) image = Image.fromarray(frame) # ___________________________________________________________________________DETECT WITH YOLO t1 = time.time() boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # ___________________________________________________________________________DRAW DETECT BOX to_move=[] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(0,0,255), 1) temp=int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]) to_move.append(temp) # now feed tracked box to move # ___________________________________________________________________________MOVE if to_move : # Initial co-ordinates of the object to be tracked # Create the tracker object mover = [dlib.correlation_tracker() for _ in range(len(to_move))] # Provide the tracker the initial position of the object [mover[i].start_track(frame, dlib.rectangle(*rect)) for i, rect in enumerate(to_move)] ## FEED FIRST BOX HERE for i in range (0,100): ##### START LOOP MOVER ret, frame = video_capture.read() # tempo full_frame_mover=[] frame=cv2.flip(frame,1) #tempo # Update the mover for i in range(len(mover)): #_____________FEED NEW IMAGE mover[i].update(frame) #_________________DRAW rect = mover[i].get_position() pt1 = (int(rect.left()), int(rect.top())) pt2 = (int(rect.right()), int(rect.bottom())) cv2.rectangle(frame, pt1, pt2, (255, 255, 255), 3) full_frame_mover.append((pt1,pt2)) #print(full_frame_mover) # finish 1 frame # ___________________________________________________________________________Call the tracker tracker.predict() tracker.update(detections) # ___________________________________________________________________________DRAW TRACK RECTANGLE for track in tracker.tracks: if track.is_confirmed() and track.time_since_update >1 : continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])+30),0, 5e-3 * 200, (0,255,0),3) dot=int(int(bbox[0])+((int(bbox[2])-int(bbox[0]))/2)),int(bbox[3]-10) cv2.circle(frame,dot, 10, (0,0,255), -1) cv2.imshow('', frame) # Continue until the user presses ESC key if cv2.waitKey(1) == 27: break # END LOOP MOVER # ___________________________________________________________________________Call the tracker tracker.predict() tracker.update(full_frame_mover) # ___________________________________________________________________________DRAW TRACK RECTANGLE for track in tracker.tracks: if track.is_confirmed() and track.time_since_update >1 : continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])+30),0, 5e-3 * 200, (0,255,0),3) dot=int(int(bbox[0])+((int(bbox[2])-int(bbox[0]))/2)),int(bbox[3]-10) cv2.circle(frame,dot, 10, (0,0,255), -1) # ___________________________________________________________________________GET POINTS From click if(cv2.waitKey(1)==ord('p')): points = get_lines.run(frame, multi=True) print(points) if points : for line in points: cv2.line(frame, line[0:2], line[2:5], (0,255,255), 2) # draw line cv2.imshow('', frame) print('process time : ',time.time()-tpro) tpro=time.time() if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ') list_file.write('\n') if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort see github deep sort for more information model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) """ A nearest neighbor distance metric that, for each target, returns the closest distance to any sample that has been observed so far. """ metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # multi target tracker tracker = Tracker(metric) # Return an identifiable list of physical devices visible to the host runtime physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) # enable memory growth for physical devices # utilised to identify type of YoloV3 used if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) # load pre-trained weights # pre-trained from open sources, many from public repos on github. yolo.load_weights(FLAGS.weights) logging.info('weights loaded') # array contains name of classes (flags) class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') # capture a video from the camera or a video file, files for our demonstrations. try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) # output video is empty out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 _, img = vid.read() h, w, c = img.shape h_numStep = 12 # number of boxes in a column w_numStep = 20 # number of boxes in a row #make matrix-array M of categories of different areas 1=food area, etc. M = [[1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5], [1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5], [1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 8, 8], [2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 8, 8, 8, 8], [2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8], [2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 7, 7], [2, 2, 2, 2, 2, 2, 2, 2, 4, 6, 6, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7], [2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 7, 7, 7, 7], [2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 7, 7, 7, 7, 7], [2, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 7, 7, 7, 7, 7], [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7], [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7]] # store the total time that customers stay in box[i][j] total_time_engage = [[0 for i in range(w_numStep + 1)] for j in range(h_numStep + 1)] # store the time that customer k is stationary in box[i][j] stationary_time = [[[0 for i in range(w_numStep + 1)] for j in range(h_numStep + 1)] for k in range(100000)] # store the positions of single customer x_single_tracking = [] y_single_tracking = [] # single customer's trackingID single_trackingID = 34 # store the current position of customer max_trackID = 0 x_trackID = [-1] * 1000000 y_trackID = [-1] * 1000000 # file store the total_time_engage file = 'total_time_engage.txt' fps = 0.0 count = 0 while True: _, img = vid.read() if img is None: logging.warning("Empty Frame") time.sleep(0.1) count += 1 if count < 3: continue else: break # convert an image from one color space to another img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # return a tensor with a length 1 axis inserted at index 0 img_in = tf.expand_dims(img_in, 0) # resize the image to 416x416 # remember resolution has to be able to work with it # tensorflow.image.resize: resize image to size img_in = transform_images(img_in, FLAGS.size) # return the number of seconds passed since epoch t1 = time.time() time_finish_last_tracking = t1 boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) # detections detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Pass detections to the deepsort object and obtain the track information # predicts and updates via detection tracker.predict() tracker.update(detections) # draw horizontal boxes y_step = int(h / h_numStep) y_start = 0 while True: y_end = y_start + y_step cv2.rectangle(img, (0, y_start), (int(w), y_end), (0, 0, 0), 1) y_start = y_end if y_start >= int(h): break # finish drawing here # draw vertical boxes x_step = int(w / w_numStep) x_start = 0 while True: x_end = x_start + x_step cv2.rectangle(img, (x_start, 0), (x_end, int(h)), (0, 0, 0), 1) x_start = x_end if x_start >= int(w): break # finish drawing here time_step = time.time() - time_finish_last_tracking for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # get the corrected/predicted bounding box class_name = track.get_class( ) # get the class name of particular object color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] # identify center of a boundary box x_cent = int(bbox[0] + (bbox[2] - bbox[0]) / 2) y_cent = int(bbox[1] + (bbox[3] - bbox[1]) / 2) # draw detection on frame cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) # draw rectangle cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(img, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # insert objectName and objectID # display the area each person is in # cv # update the stationary_time and total_time_engage array if class_name == "person": x_pos = int(x_cent / x_step) y_pos = int(y_cent / y_step) #print(str(track.track_id) + ": [" + str(y_pos) + ", " + str(x_pos) + "]") if track.track_id > max_trackID: max_trackID = track.track_id x_trackID[track.track_id] = y_pos y_trackID[track.track_id] = x_pos stationary_time[track.track_id][y_pos][x_pos] += time_step total_time_engage[y_pos][x_pos] += time_step # track a single person if class_name == "person" and track.track_id == single_trackingID: x_single_tracking.append(x_pos) y_single_tracking.append(y_pos) for track_index in range(max_trackID + 1): if x_trackID[track_index] != -1: print("customerID " + str(track_index) + ": [" + str(x_trackID[track_index]) + "," + str(y_trackID[track_index]) + "] in " + market_section(M[ x_trackID[track_index]][y_trackID[track_index]])) with open(file, 'w') as filetostore: for i in range(h_numStep): for j in range(w_numStep): filetostore.write( "{:.2f}".format(total_time_engage[i][j]) + " ") filetostore.write("\n") ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) time_finish_last_tracking = time.time() # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break f = open("total_time_engage.txt", "rt") f.close() # insert data into the database # initialise track arrays track_time = [0] * 10000000 track_customerID = [0] * 10000000 track_area = ["" for x in range(10000000)] x_single = [0] * 10000000 y_single = [0] * 10000000 # organise data to be inserted track_index = -1 for k in range(1000): for h in range(h_numStep): for w in range(w_numStep): if stationary_time[k][h][w] != 0: track_index += 1 track_time[track_index] = stationary_time[k][h][w] track_customerID[track_index] = k track_area[track_index] = str(h) + ', ' + str(w) x_tmp = -1 y_tmp = -1 single_track_index = -1 for k in range(len(x_single_tracking)): if x_single_tracking[k] != x_tmp and y_single_tracking[k] != y_tmp: single_track_index += 1 x_single[single_track_index] = x_single_tracking[k] y_single[single_track_index] = y_single_tracking[k] x_tmp = x_single[single_track_index] y_tmp = y_single[single_track_index] single_tracking_areas = "" for k in range(single_track_index): single_tracking_areas += '[' + str(x_single[k]) + ',' + str( y_single[k]) + '] , ' # connect and insert the appropriate data in primary_table for k in range(track_index + 1): try: conn = mariadb.connect(user="******", password="******", host="localhost", database="trackingDB") cur = conn.cursor() mySql_insert_query = """INSERT INTO primary_table(trackID, customerID, area) VALUES (%s, %s, %s) """ recordTuple = (k, track_customerID[k], track_area[k]) cur.execute(mySql_insert_query, recordTuple) conn.commit() except mariadb.Error as error: print("Failed to insert record into the primary_table {}".format( error)) finally: if (conn.is_connected()): cur.close() conn.close() # connect and insert the appropriate data in "engaged" table for k in range(track_index + 1): try: conn = mariadb.connect(user="******", password="******", host="localhost", database="trackingDB") cur = conn.cursor() mySql_insert_query = """INSERT INTO engaged(trackID, engagement_time) VALUES (%s, %s) """ recordTuple = (k, track_time[k]) cur.execute(mySql_insert_query, recordTuple) conn.commit() except mariadb.Error as error: print("Failed to insert record into the engaged table {}".format( error)) finally: if (conn.is_connected()): cur.close() conn.close() # connect and insert the appropriate data in "total_areas" table try: conn = mariadb.connect(user="******", password="******", host="localhost", database="trackingDB") cur = conn.cursor() mySql_insert_query = """INSERT INTO total_areas(customerID, all_areas_visited) VALUES (%s, %s) """ recordTuple = (single_trackingID, single_tracking_areas) cur.execute(mySql_insert_query, recordTuple) conn.commit() except mariadb.Error as error: print("Failed to insert record into the total_areas table {}".format( error)) finally: if (conn.is_connected()): cur.close() conn.close() # plot the graph fig = plt.figure(1) fig.suptitle('Engagement time on different areas', fontsize=20) ax = plt.axes(projection='3d') ax = plt.axes(projection='3d') # Data for a three-dimensional line x = np.arange(w_numStep - 1, -1, -1) y = np.linspace(0, h_numStep - 1, h_numStep) X, Y = np.meshgrid(x, y) Z = [[0 for j in range(w_numStep)] for i in range(h_numStep)] for i in range(h_numStep): for j in range(w_numStep): Z[i][j] = total_time_engage[i][j] Z = np.array(Z) # Plot the surface. ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='viridis', edgecolor='none') ax.set_xlabel('width') ax.set_ylabel('height') ax.set_zlabel('time') ax.view_init(35, 80) #gets the polar axis on the current image frame = plt.gca() #gets x and y axis list of x and y axis tick locations frame.axes.get_xaxis().set_ticks([]) frame.axes.get_yaxis().set_ticks([]) #Plots the figure fig2 = plt.figure(2) fig2_title = 'Walking pattern of a single customer( trackingID = ' + str( single_trackingID) + ')' fig2.suptitle(fig2_title, fontsize=15) plt.plot(x_single_tracking, y_single_tracking, 'ro') plt.axis([0, w_numStep, h_numStep, 0]) frame.axes.get_xaxis().set_ticks([]) frame.axes.get_yaxis().set_ticks([]) fig.savefig('engage_level.jpg') fig2.savefig('single_tracking.jpg') plt.show() vid.release() if FLAGS.ouput: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 1.0 output_format = 'mp4' video_name = 'bus4_2in_4out.mp4' file_path = join('data_files/videos', video_name) output_name = 'save_data/out_' + video_name[0:-3] + output_format initialize_door_by_yourself = False door_array = None # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True writeVideo_flag = True asyncVideo_flag = False counter = Counter(counter_in=0, counter_out=0, track_id=0) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_name, fourcc, 15, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() ret, first_frame = video_capture.read() if door_array is None: if initialize_door_by_yourself: door_array = select_object(first_frame)[0] print(door_array) else: all_doors = read_door_info('data_files/doors_info.csv') door_array = all_doors[video_name] border_door = door_array[3] error_values = [] truth = get_truth(video_name) while True: ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: total_count = counter.return_total_count() true_total = truth.inside + truth.outside err = abs(total_count - true_total) / true_total log_res = "in video: {}\n predicted / true\n counter in: {} / {}\n counter out: {} / {}\n" \ " total: {} / {}\n error: {}\n______________\n".format(video_name, counter.counter_in, truth.inside, counter.counter_out, truth.outside, total_count, true_total, err) with open('log_results.txt', 'w') as file: file.write(log_res) print(log_res) error_values.append(err) break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 2) for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: score = "%.2f" % (det.confidence * 100) + "%" rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) rect_door = Rectangle(int(door_array[0]), int(door_array[1]), int(door_array[2]), int(door_array[3])) intersection = rect_head & rect_door if intersection: squares_coeff = rect_square(*intersection) / rect_square( *rect_head) cv2.putText( frame, score + " inter: " + str(round(squares_coeff, 3)), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 100, 255), 5) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # first appearence of object with id=track.id if track.track_id not in counter.people_init or counter.people_init[ track.track_id] == 0: counter.obj_initialized(track.track_id) rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) res = rect_head & rect_door if res: inter_square = rect_square(*res) head_square = rect_square(*rect_head) # was initialized in door, probably going in if (inter_square / head_square) >= 0.8: counter.people_init[track.track_id] = 2 # initialized in the bus, mb going out elif (inter_square / head_square) <= 0.4 or bbox[3] > border_door: counter.people_init[track.track_id] = 1 # res is None, means that object is not in door contour else: counter.people_init[track.track_id] = 1 counter.people_bbox[track.track_id] = bbox counter.cur_bbox[track.track_id] = bbox adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 5) if not show_detections: track_cls = track.cls cv2.putText(frame, str(track_cls), (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.putText( frame, 'ADC: ' + adc, (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) id_get_lost = [ track.track_id for track in tracker.tracks if track.time_since_update >= 25 and track.age >= 29 ] id_inside_tracked = [ track.track_id for track in tracker.tracks if track.age > 60 ] for val in counter.people_init.keys(): # check bbox also cur_c = find_centroid(counter.cur_bbox[val]) init_c = find_centroid(counter.people_bbox[val]) vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1]) if val in id_get_lost and counter.people_init[val] != -1: # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if vector_person[1] > 70 and counter.people_init[ val] == 2: # and counter.people_bbox[val][3] > border_door \ counter.get_in() elif vector_person[1] < -70 and counter.people_init[val] == 1: counter.get_out() counter.people_init[val] = -1 print(f"person left frame") print(f"current centroid - init : {cur_c} - {init_c}\n") print(f"vector: {vector_person}\n") del val # elif val in id_inside_tracked and val not in id_get_lost and counter.people_init[val] == 1 \ # and bb_intersection_over_union(counter.cur_bbox[val], door_array) <= 0.3 \ # and vector_person[1] > 0: # and \ # # counter.people_bbox[val][3] > border_door: # counter.get_in() # # counter.people_init[val] = -1 # print(f"person is tracked for a long time") # print(f"current centroid - init : {cur_c} - {init_c}\n") # print(f"vector: {vector_person}\n") # imaggg = cv2.line(frame, find_centroid(counter.cur_bbox[val]), # find_centroid(counter.people_bbox[val]), # (0, 0, 255), 7) # cv2.imshow('frame', imaggg) # cv2.waitKey(0) ins, outs = counter.show_counter() cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (10, 30), 0, 1e-3 * frame.shape[0], (255, 0, 0), 5) cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.resizeWindow('image', 1400, 800) cv2.imshow('image', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 # print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows() mean_error = np.mean(error_values) print("mean error for {} video: {}".format(video_name, mean_error))
def run(sequence_dir, detection_file, output_file, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget, display, offset, n_frames, max_iou_distance, max_age, n_init, alpha_ds=0.0): """Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_file : str Path to the detections file. output_file : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. """ seq_info = gather_sequence_info(sequence_dir, detection_file, offset, n_frames) metric = nn_matching.NearestNeighborDistanceMetric(alpha_ds, max_cosine_distance, nn_budget) tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) results = [] def frame_callback(vis, frame_idx): aaa = n_frames / 10 if frame_idx % aaa == 0: print("Processing frame {} / {} ".format(frame_idx, n_frames)) # Load image and generate detections. detections = create_detections(seq_info["detections"], frame_idx, min_detection_height) detections = [d for d in detections if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Update visualization. if display: image = cv2.imread(seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR) vis.set_image(image.copy()) vis.draw_detections(detections) vis.draw_trackers(tracker.tracks) # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlwh() results.append([ frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], 1, -1, -1, -1 ] + list(track.last_feature())) # Run tracker. if display: visualizer = visualization.Visualization(seq_info, update_ms=5) else: visualizer = visualization.NoVisualization(seq_info) visualizer.run(frame_callback) # Store results. np.save(output_file, results)
def human_tracking(frames, yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) new_frames = [] for frame in frames: image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() frame = cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) frame = cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) for det in detections: bbox = det.to_tlbr() frame = cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) new_frames.append(frame) return new_frames, boxs
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap1 = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True OPTICAL = False # video_filename = './dataset/people.mp4' # video_filename = 'C:/tensorflow1\models/research\object_detection/videos/IMG_1101.MOV' video_filename = 'C:/tensorflow1\models/research\object_detection/videos/IMG_1105-diet.mp4' video_capture = cv2.VideoCapture(video_filename) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') out = cv2.VideoWriter( 'C:/tensorflow1\models/research\object_detection/videos/output_0419.avi', fourcc, 30, (w, h)) list_file = open('detection.txt', 'w') list_file2 = open('tracking.txt', 'w') frame_index = -1 fps = 0.0 firstflag = 1 while True: ok, frame = video_capture.read() # frame shape 640*480*3 # cv2.imwrite("test.png", frame) # exit() if ok != True: break t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # [x,y,w,h] # print("box_num",len(boxs)) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression (NMS) boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap1, scores) detections = [detections[i] for i in indices] ### Call the tracker tracker.predict() tracker.update(detections) ### Add one more step of optical flow # convert detections to bboxs for optical flow n_object = len(detections) bboxs = np.empty((n_object, 4, 2), dtype=float) i = 0 for det in detections: bbox = det.to_tlbr() # (min x, min y, max x, max y) (xmin, ymin, boxw, boxh) = (int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1])) bboxs[i, :, :] = np.array([[xmin, ymin], [xmin + boxw, ymin], [xmin, ymin + boxh], [xmin + boxw, ymin + boxh]]).astype(float) i = i + 1 if firstflag: oldframe = frame else: startXs, startYs = getFeatures(cv2.cvtColor( frame, cv2.COLOR_RGB2GRAY), bboxs, use_shi=False) newXs, newYs = estimateAllTranslation(startXs, startYs, oldframe, frame) Xs, Ys, newbboxs = applyGeometricTransformation( startXs, startYs, newXs, newYs, bboxs) oldframe = frame ## generate new detections boxs = bbox_transform(newbboxs) features = encoder(frame, boxs) detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap1, scores) detections = [detections[i] for i in indices] ## Call the tracker again tracker.predict() tracker.update(detections) origin_frame = frame.copy() draw_count_line(frame) draw_people_point_line(frame) draw_count_text(frame) boxes_tracking = np.array( [track.to_tlwh() for track in tracker.tracks]) ### Deep sort tracker visualization for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() central_point = Point( int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2 + (bbox[3] - bbox[1]) / 3)) # 원본 이미지를 넣어줌 crop_img = origin_frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] append_point(central_point, track.track_id, crop_img) # detect_face_gender(crop_img) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) ### Start from the first frame, do optical flow for every two consecutive frames. if OPTICAL: if firstflag: n_object = len(detections) bboxs = np.empty((n_object, 4, 2), dtype=float) i = 0 for det in detections: bbox = det.to_tlbr() # (min x, min y, max x, max y) (xmin, ymin, boxw, boxh) = (int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1])) bboxs[i, :, :] = np.array([[xmin, ymin], [xmin + boxw, ymin], [xmin, ymin + boxh], [xmin + boxw, ymin + boxh]]).astype(float) i = i + 1 startXs, startYs = getFeatures(cv2.cvtColor( frame, cv2.COLOR_RGB2GRAY), bboxs, use_shi=False) oldframe = frame oldbboxs = bboxs else: ### add new tracking object # new_n_object = len(detections) # if new_n_object > n_object: # # Run non-maxima suppression (NMS) # tmp_boxes = np.array([d.tlwh for d in detections]) # tmp_scores = np.array([d.confidence for d in detections]) # tmp_indices = preprocessing.non_max_suppression(tmp_boxes, nms_max_overlap2, tmp_scores) # tmp_detections = [detections[i] for i in indices] # if len(tmp_detections)>n_object: newXs, newYs = estimateAllTranslation(startXs, startYs, oldframe, frame) Xs, Ys, newbboxs = applyGeometricTransformation( startXs, startYs, newXs, newYs, oldbboxs) # update coordinates (startXs, startYs) = (Xs, Ys) oldframe = frame oldbboxs = newbboxs # update feature points as required n_features_left = np.sum(Xs != -1) print('# of Features: %d' % n_features_left) if n_features_left < 15: print('Generate New Features') startXs, startYs = getFeatures( cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY), newbboxs) # draw bounding box and visualize feature point for each object for j in range(n_object): (xmin, ymin, boxw, boxh) = cv2.boundingRect(newbboxs[j, :, :].astype(int)) cv2.rectangle(frame, (xmin, ymin), (xmin + boxw, ymin + boxh), (255, 255, 255), 2) # BGR color cv2.putText(frame, str(j), (xmin, ymin), 0, 5e-3 * 200, (0, 255, 0), 2) # red color features # for k in range(startXs.shape[0]): # cv2.circle(frame, (int(startXs[k,j]),int(startYs[k,j])),3,(0,0,255),thickness=2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # BGR color # cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) # detection frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') # tracking list_file2.write(str(frame_index) + ' ') if len(boxes_tracking) != 0: for i in range(0, len(boxes_tracking)): list_file2.write( str(boxes_tracking[i][0]) + ' ' + str(boxes_tracking[i][1]) + ' ' + str(boxes_tracking[i][2]) + ' ' + str(boxes_tracking[i][3]) + ' ') list_file2.write('\n') firstflag = 0 fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! # if cv2.waitKey(1) & 0xFF == ord('q'): # break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 0.7 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True webcam_flag = False resize_flag = True resize_size = (800, 450) # some links from earthcam https://github.com/Crazycook/Working/blob/master/Webcams.txt https://www.vlcm3u.com/web-cam-live/ # video_url = 'https://videos3.earthcam.com/fecnetwork/lacitytours1.flv/chunklist_w683585821.m3u8' # HOLLYWOOD # video_url = 'https://videos3.earthcam.com/fecnetwork/9974.flv/chunklist_w1421640637.m3u8' # NYC # video_url = 'https://videos3.earthcam.com/fecnetwork/5775.flv/chunklist_w1803081483.m3u8' # NYC 2 # video_url = 'http://181.1.29.189:60001/cgi-bin/snapshot.cgi?chn=0&u=admin' # video_url = 'https://videos-3.earthcam.com/fecnetwork/15559.flv/chunklist_w573709200.m3u8' # NYC 3 video_url = 'https://hddn01.skylinewebcams.com/live.m3u8?a=97psdt8nv2hsmclta3nuu4di94' if webcam_flag: video_capture = cv2.VideoCapture(0) else: video_capture = cv2.VideoCapture() video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 2) video_capture.open(video_url) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() if resize_flag: frame = cv2.resize(frame, resize_size, interpolation=cv2.INTER_AREA) # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs = yolo.detect_image(image) # print("box_num",len(boxs)) if np.array(boxs).size > 0: features = encoder(frame, np.array(boxs)[:, 0:4].tolist()) class_names = yolo.class_names # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1]) - 10), 0, 5e-3 * 100, (0, 0, 255), 2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText( frame, class_names[int(det.label)] + "(" + str(round(det.score, 2)) + ")", (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 90, (255, 0, 0), 2) #cv2.putText(frame, str(int(bbox[0])) + "-" + str(int(bbox[3])) ,(int(bbox[0]), int(bbox[3])),0, 5e-3 * 90, (0,0,255),2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = True asyncVideo_flag = False file_path = 'video.webm' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() model_par, valid_transform = model_init_par() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() #crop_img = frame[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] crop_img = image.crop( [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) #res_txt = demo_par(model_par, valid_transform, crop_img) #draw.rectangle(xy=person_bbox[:-1], outline='red', width=1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) font = ImageFont.truetype( '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/arial.ttf', size=10) # positive_cnt = 1 # for txt in res_txt: # if 'personal' in txt: # #draw.text((x1, y1 + 20 * positive_cnt), txt, (255, 0, 0), font=font) # cv2.putText(frame, txt, (int(bbox[0]), int(bbox[1]) + 20 * positive_cnt), 0, # 1e-3 * frame.shape[0], (0, 255, 0), 1) # positive_cnt += 1 cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(): min_confidence = 20 max_cosine_distance = 0.5 min_detection_height = 0 nms_max_overlap = 1.0 max_cosine_distance = 0.2 nn_budget = 100 args = parser() check_arguments_errors(args) encoder = generate_detections.create_box_encoder( args.model_file, batch_size=args.batch_size) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) results = [] # yolov4 # yolo(args=args) random.seed(3) # deterministic bbox colors network, class_names, class_colors = darknet.load_network( args.config_file, args.data_file, args.weights, batch_size=args.batch_size) images = load_images(args.input) writeVideo_flag = True video_path = "./output/output.mp4" if writeVideo_flag: # Define the codec and create VideoWriter object # w = int(video_capture.get(3)) # h = int(video_capture.get(4)) width, height = get_image_size(network) first_image = cv2.imread(images[0]) org_h, org_w = first_image.shape[:2] print(width, height, org_w, org_h) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(video_path, fourcc, 15, (width, height)) list_file = open(args.output_file, 'w') counter = [] fps = 0.0 index = 0 while True: # loop asking for new image paths if no list is given if args.input: if index >= len(images): break image_name = images[index] else: image_name = input("Enter Image Path: ") prev_time = time.time() # after darknet detection, bbox = (center_x, center_y, w, h) image, detections = image_detection(image_name, network, class_names, class_colors, args.thresh) # if args.save_labels: # save_annotations(image_name, image, detections, class_names, output=args.output) boxs = [] predicted_names = [] for label, confidence, bbox in detections: # extract feature, we need bbox = (left, top, w, h) center_x, center_y, w, h = bbox xmin = int(round(center_x - (w / 2))) ymin = int(round(center_y - (h / 2))) boxs.append([xmin, ymin, w, h]) # predict_name = class_names.index(label) # predicted_names.append(predict_name) features = encoder(image, boxs) # 为每个框创建检测器 box_idx = 0 dets = [] for label, confidence, bbox in detections: if bbox[3] < min_detection_height: continue if bbox[2] > 0.8 * width: print(label, ", ", confidence, bbox) # create Detection, we need bbox = (left, top, w, h) center_x, center_y, w, h = bbox xmin = int(round(center_x - (w / 2))) ymin = int(round(center_y - (h / 2))) dets.append( Detection([xmin, ymin, w, h], confidence, features[box_idx])) box_idx += 1 # 过滤置信度小于阈值的框 detections = [d for d in dets if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) # 对框进行非极大值抑制 detections = [detections[i] for i in indices] # Update tracker. # 卡尔曼滤波对tracker跟踪器进行状态预测 # 第一帧没有tracker tracker.predict() # 对跟踪器进行更新 # 对未匹配的detections进行初始化,添加track tracker.update(detections) i = int(0) indexIDs = [] boxes = [] for det in detections: bbox = det.to_tlbr() cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # 图像经过darknet检测后,被resize到(608, 608) # 此时存储结果的话,需要重新resize回原图(1920 1080)对应的位置 bbox = track.to_tlwh() x, y, w, h = bbox a_x, a_y, a_w, a_h = x / width, y / height, w / width, h / height results.append([ index + 1, track.track_id, org_w * a_x, org_h * a_y, org_w * a_w, org_h * a_h ]) indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 1) cv2.putText(image, str(track.track_id), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (color), 1) # if len(class_names) > 0: # cv2.putText(image, str(class_names[0]),(int(bbox[0]), int(bbox[1] -20)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (color),1) # pt1 = int(bbox[0]), int(bbox[1]) # pt2 = int(bbox[2]), int(bbox[3]) # cv2.rectangle(image, pt1, pt2, color, 1) # if track.track_id is not None: # text_size = cv2.getTextSize(str(track.track_id), cv2.FONT_HERSHEY_PLAIN, 1, 1) # center = pt1[0] + 5, pt1[1] + 5 + text_size[0][1] # pt2 = pt1[0] + 10 + text_size[0][0], pt1[1] + 10 + text_size[0][1] # cv2.rectangle(image, pt1, pt2, color, -1) # cv2.putText(image, str(track.track_id), center, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) i += 1 center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) pts[track.track_id].append(center) thickness = 1 #center point cv2.circle(image, (center), 1, color, thickness) #draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(image, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), 1) #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255),1) # count = len(set(counter)) # cv2.putText(image, "Total Object Counter: "+str(count),(int(20), int(120)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0),1) # cv2.putText(image, "Current Object Counter: "+str(i),(int(20), int(80)),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0),1) cv2.putText(image, "FPS: %f" % (fps), (int(20), int(40)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) cv2.namedWindow("YOLO_Deep_SORT", 0) cv2.resizeWindow('YOLO_Deep_SORT', 1024, 768) cv2.imshow('YOLO_Deep_SORT', image) if writeVideo_flag: out.write(image) fps = (fps + (1. / (time.time() - prev_time))) / 2 #print(set(counter)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break # fps = int(1./(time.time() - prev_time)) print("FPS: {}".format(fps)) index = index + 1 if writeVideo_flag: for row in results: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (row[0], row[1], row[2], row[3], row[4], row[5]), file=list_file)
class Client: """ The camera client component of the experiment. Responsible for: 1. run object detection (YoloV4) and tracking (Deep SORT) and send results; 2. read the images from dataset and convert to h264 video 3. convert the video to request bitrate and send to the server """ def __init__(self, id, batch_size = 15, dataset_dir='../others/dds/dataset/WildTrack/src/C'): # id: string self.id = id # video batch size integer self.batch_size = batch_size # displacement self.displacement_check = {} # Deep SORT encoding. setting is the same for now self.max_cosine_distance = 0.3 self.nn_budget = None self.nms_max_overlap = 1.0 self.temp_dir = 'temp-cropped' os.makedirs(self.temp_dir, exist_ok = True) self.dataset_dir = dataset_dir + id # read the total number of file from the server fnames = sorted(os.listdir(dataset_dir + id)) self.total_frame = len(fnames) print("Total number of frames: ", str(self.total_frame)) print("Simulating the camera with video frame size 15") # initiate the yolo v4 network network, class_names, class_colors = darknet.load_network( './darknet/cfg/yolov4.cfg', './darknet/cfg/coco.data', './darknet/yolov4.weights', batch_size=1 ) self.network = network self.class_names = class_names # initiate the deep sort network # multi-person tracking model_filename = 'model_data/mars-small128.pb' self.encoder = gdet.create_box_encoder(model_filename, batch_size=1) self.metric = nn_matching.NearestNeighborDistanceMetric("cosine", self.max_cosine_distance, self.nn_budget) self.tracker = Tracker(self.metric) print("Camera initiated") def first_phase(self, start_id): # read the images batch and run detections end_id = min(self.total_frame, int(start_id) + self.batch_size) print(end_id) total_obj = 0 unique_obj_bbox = {} displacement_check = self.displacement_check for i in range(int(start_id), end_id): # print(self.dataset_dir + "/" + f"{str(i).zfill(10)}.png") image = cv2.imread(self.dataset_dir + "/" + f"{str(i).zfill(10)}.png") darknet_image = darknet.make_image(1920, 1080, 3) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) darknet.copy_image_from_bytes(darknet_image, image_rgb.tobytes()) # detections list of tuple: (class_name, confidence_score, (bbox_info)) detections = darknet.detect_image(self.network, self.class_names, darknet_image, thresh=0.4) total_obj = total_obj + len(detections) bboxes = [obj[2] for obj in detections] confidence = [obj[1] for obj in detections] classes = [obj[0] for obj in detections] features = self.encoder(image_rgb, bboxes) detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip(bboxes, confidence, classes, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] self.tracker.predict() self.tracker.update(detections) for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() if track.track_id not in unique_obj_bbox: # unique_obj_bbox.append(track.track_id) unique_obj_bbox[track.track_id] = {'feature': track.features[0].tolist()} unique_obj_bbox[track.track_id]['length'] = 0 # find the center point of the tracking object center_point = track.to_tlwh() c_x = center_point[0] + (center_point[2]) / 2 c_y = center_point[1] + (center_point[3]) / 2 if track.track_id not in displacement_check: displacement_check[track.track_id] = (c_x, c_y) else: disp = math.sqrt((c_x - displacement_check[track.track_id][0]) ** 2 + (c_y - displacement_check[track.track_id][1]) ** 2) # print(unique_obj_bbox[track.track_id]) # print('disp for cam: ', str(track.track_id), " ", str(disp)) unique_obj_bbox[track.track_id]['length'] = unique_obj_bbox[track.track_id]['length'] + disp # update the center point for next iteration displacement_check[track.track_id] = (c_x, c_y) # print(displacement_check) # print(unique_obj_bbox[track.track_id]['length']) self.displacement_check = displacement_check # print(displacement_check) return {'total_obj': total_obj, 'unique_obj_bbox': unique_obj_bbox} def second_phase(self, bitrate, start_id): encoded_vid_path = os.path.join(self.temp_dir, "temp.mp4") if not bitrate: encoding_result = subprocess.run(["ffmpeg", "-y", "-loglevel", "error", "-start_number", str(start_id), '-i', f"{self.dataset_dir}/%010d.png", "-vcodec", "libx264", "-g", "15", "-keyint_min", "15", "-pix_fmt", "yuv420p", "-frames:v", str(self.batch_size), encoded_vid_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) else: rate=str(bitrate)+"k" encoding_result = subprocess.run(["ffmpeg", "-y", "-loglevel", "error", "-start_number", str(start_id), '-i', f"{self.dataset_dir}/%010d.png", "-vcodec", "libx264", "-g", "15", "-keyint_min", "15", "-maxrate", f"{rate}", "-b", f"{rate}", "-bufsize", f"{rate}", "-pix_fmt", "yuv420p", "-frames:v", str(self.batch_size), encoded_vid_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) return "OK"
def run(metadata_fpath, detections_dir, out_fpath, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget): """Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_file : str Path to the detections file. output_file : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. """ metadata = pd.read_csv(metadata_fpath) fnames = metadata['filename'] metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) results = [] for fname in fnames: frame_idx = int(fname.strip('.png').split('_')[-1]) det_fpath = Path(detections_dir, fname + '.deepsort.npy') det = np.load(det_fpath) # Load image and generate detections. detections = create_detections(det, min_detection_height) detections = [d for d in detections if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlwh() results.append([ frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3] ]) # Store results. f = open(out_fpath, 'w') for row in results: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (row[0], row[1], row[2], row[3], row[4], row[5]), file=f)
def detect_and_track(file_path, save_path, detection_mode="YOLO3"): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # 如果要保存视频,定义视频size size = (640, 480) save_fps = 24 # use deep_sort tracker model_filename = '../deep_sort/model_data/resources/networks/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) write_video_flag = True # 假设图中最多300个目标,生成300种随机颜色 colours = np.random.rand(300, 3) * 255 video_capture = cv2.VideoCapture(file_path) if write_video_flag: output_video = cv2.VideoWriter(save_path + 'output.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), save_fps, size) object_list_file = open(save_path + 'detection.txt', 'w') frame_index = -1 if detection_mode == "YOLO3": yolo = YOLO() elif detection_mode == "SSD": ssd = SSD() # appear记录每个出现过的目标存在的帧数量,number记录所有出现过的目标(不重复) appear = {} number = 0 while True: ret, frame = video_capture.read() if ret is not True: break frame = cv2.resize(frame, size) # 记录每一帧开始处理的时间 start_time = time.time() if detection_mode == "YOLO3": image = Image.fromarray(frame[..., ::-1]) # boxes为[x,y,w,h]形式坐标,detect_scores为目标分数,origin_boxes为左上角+右下角坐标形式 boxes, detect_scores, origin_boxes = yolo.detect_image(image) elif detection_mode == "SSD": rclasses, rscores, rbboxes = ssd.process_image(frame) height, width = frame.shape[0], frame.shape[1] boxes = [] # 遍历一帧图片中每个目标的(对应classes) for i in range(rclasses.shape[0]): # rbboxes原始形式为0-1范围的左上角和右下角坐标 xmin, ymin = int(rbboxes[i, 1] * width), int(rbboxes[i, 0] * height) xmax, ymax = int(rbboxes[i, 3] * width), int(rbboxes[i, 2] * height) # 转换为x,y,w,h形式的坐标 x, y, w, h = int(xmin), int(ymin), int(xmax - xmin), int(ymax - ymin) if x < 0: w = w + x x = 0 if y < 0: h = h + y y = 0 boxes.append([x, y, w, h]) boxes = np.array(boxes) features = encoder(frame, boxes) # score to 1.0 here detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)] # 非极大值抑制 boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # 追踪器预测和更新 tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() color = (int(colours[track.track_id % 300, 0]), int(colours[track.track_id % 300, 1]), int(colours[track.track_id % 300, 2])) # (int(bbox[0]), int(bbox[1]))为左上角坐标,(int(bbox[2]), int(bbox[3]))为右下角坐标 cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, color, 2) if track.track_id in appear.keys(): appear[track.track_id] += 1 else: number += 1 appear[track.track_id] = 1 show_fps = 1. / (time.time() - start_time) cv2.putText(frame, text="FPS: " + str(int(show_fps)), org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(0, 255, 0), thickness=2) cv2.putText(frame, text="number: " + str(number), org=(3, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(0, 255, 0), thickness=2) cv2.imshow('result', frame) if write_video_flag: # 保存视频每一帧 output_video.write(frame) # 更新视频帧编号 frame_index = frame_index + 1 # detection.txt写入下一帧的编号 object_list_file.write(str(frame_index) + ' ') # 写入每一帧探测到的目标的框四个点坐标 if len(boxes) != 0: for i in range(0, len(boxes)): object_list_file.write( str(boxes[i][0]) + ' ' + str(boxes[i][1]) + ' ' + str(boxes[i][2]) + ' ' + str( boxes[i][3]) + ' ') object_list_file.write('\n') # 按q可退出 if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if write_video_flag: output_video.release() object_list_file.close() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 2.0 nn_budget = None nms_max_overlap = 3.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) show_detections = True # show object box blue when detect writeVideo_flag = True # record video ouput defaultSkipFrames = 10 # skipped frames between detections # set up collection of door H1 = 245 W1 = 370 H2 = 280 W2 = 480 H = None W = None R = 80 # min R is 56 def solve_quadratic_equation(a, b, c): """ax2 + bx + c = 0""" delta = b**2 - 4 * a * c if delta < 0: print("Phương trình vô nghiệm!") elif delta == 0: return -b / (2 * a) else: print("Phương trình có 2 nghiệm phân biệt!") if float((-b - sqrt(delta)) / (2 * a)) > float( (-b + sqrt(delta)) / (2 * a)): return float((-b - sqrt(delta)) / (2 * a)) else: return float((-b + sqrt(delta)) / (2 * a)) def setup_door(H1, W1, H2, W2, R): # bước 1 tìm trung điểm của W1, H1 W2, H2 I1 = (W1 + W2) / 2 I2 = (H1 + H2) / 2 # tìm vecto AB u1 = W2 - W1 u2 = H2 - H1 # AB chính là vecto pháp tuyến của d # ta có phương trình trung tuyến của AB # y = -(u1 / u2)* x - c/u2 c = -u1 * I1 - u2 * I2 # tìm c # bước 2 tìm tâm O của đường tròn al = c / u2 + I2 # tính D: khoảng cách I và O fi = acos(sqrt((I1 - W1)**2 + (I2 - H1)**2) / R) D = sqrt((I1 - W1)**2 + (I2 - H1)**2) * tan(fi) O1 = solve_quadratic_equation((1 + u1**2 / u2**2), 2 * (-I1 + u1 / u2 * al), al**2 - D**2 + I1**2) O2 = -u1 / u2 * O1 - c / u2 # phương trình 2 nghiệm chỉ chọn nghiệm phía trên # Bước 3 tìm các điểm trên đường tròn door_dict = dict() for w in range(W1, W2): h = O2 + sqrt(R**2 - (w - O1)**2) door_dict[w] = round(h) return door_dict door_dict = setup_door(H1, W1, H2, W2, R) totalFrames = 0 totalIn = 0 # create a empty list of centroid to count traffic pts = [deque(maxlen=30) for _ in range(9999)] file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4' video_capture = cv2.VideoCapture(file_path) fps_imutils = imutils.video.FPS().start() if writeVideo_flag: fourcc = cv2.VideoWriter_fourcc(*'MP4V') out = cv2.VideoWriter('output_yolov4.mp4', fourcc, 3, (736, 480)) while True: oke, frame = video_capture.read() # frame shape 640*480*3 if not oke: break frame = cv2.resize(frame, (736, 480)) image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # if the frame dimensions are empty, set them if W is None or H is None: (H, W) = frame.shape[:2] # calculate video time videotime = video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Draw a door line for w in range(W1, W2): cv2.circle(frame, (w, door_dict[w]), 1, (0, 255, 255), -1) cv2.circle(frame, (W1, H1), 4, (0, 0, 255), -1) cv2.circle(frame, (W2, H2), 4, (0, 0, 255), -1) if totalFrames % defaultSkipFrames == 0: t2 = time.time() boxes, confidence, classes = yolo.detect_image( image) # average time: 1.2s print(time.time() - t2) features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.cls for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for det in detections: bbox = det.to_tlbr() if show_detections and len(classes) > 0: det_cls = det.cls score = "%.2f" % (det.confidence * 100) + "%" cv2.putText(frame, str(det_cls) + " " + score, (int(bbox[0]), int(bbox[3]) - 10), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 1) for track in tracker.tracks: if not track.is_confirmed(): continue bbox = track.to_tlbr() if not_count_staff(frame, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])): # adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 255), 2) cv2.putText(frame, "STAFF", (int(bbox[0]), int(bbox[1]) - 10), 0, 1e-3 * frame.shape[0], (0, 0, 255), 1) continue else: # adc = "%.2f" % (track.adc * 100) + "%" # Average detection confidence cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) x = [c[0] for c in pts[track.track_id]] y = [c[1] for c in pts[track.track_id]] centroid_x = int(((bbox[0]) + (bbox[2])) / 2) centroid_y = int(((bbox[1]) + (bbox[3])) / 2) if not track.Counted and centroid_x in range(W1, W2): if centroid_y < np.mean(y) and door_dict[ centroid_x] > centroid_y and np.max(x) - np.min( x) > 20: totalIn += 1 track.Counted = True print(track.track_id, track.Counted) cv2.circle(frame, (centroid_x, centroid_y), 4, (0, 255, 0), -1) pts[track.track_id].append((centroid_x, centroid_y)) info = [("Time", "{:.4f}".format(videotime)), ("In", totalIn)] # loop over the info tuples and draw them on our frame for (i, (k, v)) in enumerate(info): text = "{}: {}".format(k, v) cv2.putText(frame, text, (W - 150, ((i * 20) + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) if writeVideo_flag: # save a frame out.write(frame) if show_detections: cv2.imshow('People counter', frame) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break else: # Call the tracker tracker.predict() tracker.update(detections) fps_imutils.update() totalFrames += 1 fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if writeVideo_flag: out.release() video_capture.release() cv2.destroyAllWindows()
class YoloV4DeepsortServer(BaseDetectionServer): def __init__(self, weightPath="/yolov4_sb_best.weights", configPath="/yolov4_sb.cfg", metaPath="/voc_sb.data", deepsort_modelPath="/mars_sb_14.pb", image_size=640, nms_conf_thresh=0.4, nms_iou_thresh=0.5,max_cosine_distance = 0.6, nn_budget = 50, nms_max_overlap = 1.0): try: import darknet from deep_sort.tracker import Tracker from deep_sort import nn_matching from tools import generate_detections as gdet from deep_sort import preprocessing from deep_sort.detection import Detection as deep_detection import os except ImportError: raise self.image_size = image_size project_dir = os.path.dirname(__file__) configPath = os.path.join(project_dir, configPath) weightPath = os.path.join(project_dir, weightPath) metaPath = os.path.join(project_dir, metaPath) deepsort_modelPath = os.path.join(project_dir, deepsort_modelPath) self.network, self.class_names, self.class_colors = darknet.load_network(configPath, metaPath, weightPath, batch_size=1) self.nms_max_overlap = nms_max_overlap #DarknetImage self.darknet = darknet ww = 640 hh = 480 self.darknet_image = darknet.make_image(ww, hh, 3) # deep_sort self.preprocessing = preprocessing self.deep_detection = deep_detection self.encoder = gdet.create_box_encoder(deepsort_modelPath, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric) BaseDetectionServer.__init__(self) @staticmethod def citation_notice(): return "YoloV4 Inference and Feature Extractor by Ya Xiong(Bill)\n" \ "Maintained by Robert Belshaw ([email protected])" @function_timer.interval_logger(interval=10) def get_detector_results(self, request): """ Args: request (GetDetectorResultsRequest): Returns: GetDetectorResultsResponse """ try: import cv2 except ImportError: raise if self.currently_busy.is_set(): return GetDetectorResultsResponse(status=ServiceStatus(BUSY=True)) self.currently_busy.set() detections_msg = Detections() try: frame = ros_numpy.numpify(request.image) original_shape = frame.shape frame = cv2.resize(frame, (self.image_size, int(self.image_size*0.75))) self.darknet.copy_image_from_bytes(self.darknet_image, frame.tobytes()) detections_yolo = self.darknet.detect_image(self.network, self.class_names, self.darknet_image, thresh=0.7) boxs = [] confidences = [] class_name= [] for detection in detections_yolo: if detection[0] != "ripe": # only track ripe berry whose id is 0 x1, y1, x2, y2 = self._convertBack(detection[2][0], \ detection[2][1], \ detection[2][2], \ detection[2][3]) x1 = x1*original_shape[1]/frame.shape[1] x2 = x2*original_shape[1]/frame.shape[1] y1 = y1*original_shape[0]/frame.shape[0] y2 = y2*original_shape[0]/frame.shape[0] x1 = max(min(original_shape[1]-1, x1), 1) x2 = max(min(original_shape[1]-1, x2), 1) y1 = max(min(original_shape[0]-1, y1), 1) y2 = max(min(original_shape[0]-1, y2), 1) detections_msg.objects.append(Detection(roi=RegionOfInterest(x1=x1, y1=y1, x2=x2, y2=y2), seg_roi=SegmentOfInterest(x=[], y=[]), id=self._new_id(), track_id=-1, confidence=float(detection[1])/100, class_name="unripe")) continue confidences.append(float(detection[1])/100) class_name.append(detection[0]) bounds = detection[2] xCoord = int(bounds[0] - bounds[2] / 2) yCoord = int(bounds[1] - bounds[3] / 2) boxs.append([xCoord, yCoord, int(bounds[2]), int(bounds[3])]) features = self.encoder(frame, boxs) detections = [self.deep_detection(bbox, confidence, feature) for bbox, confidence, feature in zip(boxs, confidences, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = self.preprocessing.non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker self.tracker.predict() self.tracker.update(detections) for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: track_id = 0 continue track_id = (int(track.track_id)) bbox = track.to_tlbr() x1, y1, x2, y2 = bbox[0],bbox[1],bbox[2],bbox[3] x1 = x1*original_shape[1]/frame.shape[1] x2 = x2*original_shape[1]/frame.shape[1] y1 = y1*original_shape[0]/frame.shape[0] y2 = y2*original_shape[0]/frame.shape[0] x1 = max(min(original_shape[1]-1, x1), 1) x2 = max(min(original_shape[1]-1, x2), 1) y1 = max(min(original_shape[0]-1, y1), 1) y2 = max(min(original_shape[0]-1, y2), 1) roi = (RegionOfInterest(x1=x1, y1=y1, x2=x2, y2=y2)) detections_msg.objects.append(Detection(roi=roi, seg_roi=SegmentOfInterest(x=[], y=[]), id=self._new_id(), track_id=track_id,confidence=0.99, class_name="Ripe Strawberry")) self.currently_busy.clear() except Exception as e: print("FruitCastServer error: ", e) return GetDetectorResultsResponse(status=ServiceStatus(ERROR=True), results=detections_msg) return GetDetectorResultsResponse(status=ServiceStatus(OKAY=True), results=detections_msg) def _convertBack(self, x, y, w, h): xmin = int(round(x - (w / 2))) xmax = int(round(x + (w / 2))) ymin = int(round(y - (h / 2))) ymax = int(round(y + (h / 2))) return xmin, ymin, xmax, ymax
def detect_video_with_deepsort(yolo, video_path, rot_number, output_path="", deepsort_model_filename=None): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # 读取视频 vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError("Couldn't open webcam or video") # 保存录像的代码 ,保存和原视频流一直 video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC)) video_fps = vid.get(cv2.CAP_PROP_FPS) video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) isOutput = True if output_path != "" else False if isOutput: print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size)) out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() # deep_sort 加载 encoder = gdet.create_box_encoder(deepsort_model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) frame_index = 0 while True: return_value, frame_bgr = vid.read() frame_index += 1 if frame_bgr is None: break #目标检测使用frame_rbg格式,因为训练时是用rfg图片训练的,deepsort使用bgr格式图片,因为原始代码是这样 # 1、opencv是以brg方式打开的,所以要转换成rbg才能识别 frame_rbg = cv2.cvtColor(frame_bgr.copy(), cv2.COLOR_BGR2RGB) # 2、图片旋转 frame_rbg = np.rot90(frame_rbg, rot_number) # 3、yolo检测,输出的是tlbr frame_rbg_Image = Image.fromarray(frame_rbg) out_boxes_tlbr, out_scores, out_classes = yolo.get_detect_boxes( frame_rbg_Image) #4、将目标检测输出的tlbr框转成tlwh框 out_boxes_tlwh = [] out_boxes_tlbr_1 = copy.deepcopy(out_boxes_tlbr) #如果列表中有列表,只能使用深度复制列表 if len(out_boxes_tlbr_1) != 0: for bbox in out_boxes_tlbr_1: bbox[2:] -= bbox[:2] out_boxes_tlwh.append(bbox) # print('out_boxes:',out_boxes[i]) ################################################### features = encoder(frame_bgr, out_boxes_tlwh) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(out_boxes_tlwh, features) ] # 5、Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #6 、deepsort跟踪 tracker.predict() tracker.update(detections) # index +=1 # track while 事实上这样做没有什么用 # if index >=20 and len(out_boxes_tlwh) != 0: # # if len(out_boxes_tlwh) != 0: # # track_while(encoder, tracker, vid, nms_max_overlap, out_boxes_tlwh,detections) # # index = 0 #7 、deepsort跟踪画框 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # 4、将tlwh转成tlbr bbox = track.to_tlbr() #取出跟踪轨迹 track.update_trajectory() trajectorys = track.trajectory #画点显示 # for trajectory in trajectorys: # cv2.circle(frame_bgr, trajectory, 1, (0, 0, 213), -1) # 画线显示 for i in range(0, len(trajectorys), 2): try: start_point = trajectorys[i] end_point = trajectorys[i + 1] except Exception: #如果最后一位溢出 end_point = start_point cv2.line(frame_bgr, start_point, end_point, (0, 255, 255), 2) # 绿色,3个像素宽度 # 画框框和文字 cv2.rectangle(frame_bgr, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2) cv2.putText(frame_bgr, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2) #8、目标检测画框 detections = out_boxes_tlbr for bbox in detections: cv2.rectangle(frame_bgr, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) #计算一帧时间 curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time # accum_time是总时间之和 curr_fps = curr_fps + 1 # 显示速度信息 if accum_time > 1: #如果累计够一秒,则更新fps数量 accum_time = accum_time - 1 curr_fps = curr_fps + 2 fps = "FPS: " + str(curr_fps) curr_fps = 0 cv2.putText(frame_bgr, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(0, 255, 0), thickness=2) #保存录像 if isOutput: out.write(frame_bgr) #显示图像 cv2.imshow('', frame_bgr) cv2.waitKey(1) if cv2.waitKey(1) & 0xFF == ord('q'): break yolo.close_session()
def main(yolo): start = time.time() #Definition of the parameters max_cosine_distance = 0.5 #余弦距离的控制阈值 nn_budget = None nms_max_overlap = 0.3 #非极大抑制的阈值 counter = [] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True #video_path = "./output/output.avi" video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter( './output/' + args["input"][43:57] + "_" + args["class"] + '_output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() # image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs, class_names = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue #boxes.append([track[0], track[1], track[2], track[3]]) indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) i += 1 #bbox_center_point(x,y) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) #track_id[center] pts[track.track_id].append(center) thickness = 5 #center point cv2.circle(frame, (center), 1, color, thickness) #draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness) #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2) count = len(set(counter)) cv2.putText(frame, "Total Object Counter: " + str(count), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(i), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) cv2.namedWindow("YOLO3_Deep_SORT", 0) cv2.resizeWindow('YOLO3_Deep_SORT', 1024, 768) cv2.imshow('YOLO3_Deep_SORT', frame) if writeVideo_flag: #save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - t1))) / 2 #print(set(counter)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break print(" ") print("[Finish]") end = time.time() if len(pts[track.track_id]) != None: print(args["input"][43:57] + ": " + str(count) + " " + str(class_name) + ' Found') else: print("[No Found]") video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def system(self, video_path, output_path, input_size=320, show=False, CLASSES='tiny_yolo/data/coco.names', score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[], display_tm=False, realTime=True): #arducam_utils = ArducamUtils(0) # Definition of the deep sort parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' # deep sort tensorflow pretrained model encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times, times_2 = [], [] #parameters for finding fps if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: print("\n\n\nSelected device 0") vid = cv2.VideoCapture(0, cv2.CAP_V4L2) # detect from webcam #vid.set(cv2.CAP_PROP_CONVERT_RGB, arducam_utils.convert2rgb) vid.set(cv2.CAP_PROP_FPS, 2) # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) #fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'MPEG') # defining video writer out = cv2.VideoWriter(output_path, codec, 30, (width, height)) # output_path must be .avi NUM_CLASS = self.read_class_names( CLASSES) # reading coco classes in the form of key value num_classes = len(NUM_CLASS) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) # calculating parameters for img processing fucntion loop_check, original_frame = vid.read() if not loop_check: print("\n\nCouldn't read the video") return False # colors for detection hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] detection_colors = list( map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) detection_colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), detection_colors)) # random.seed(0) random.shuffle(detection_colors) # to shuffle shades of same color # random.seed(None) newTime = 0 prevTime = 0 dummy_time = 1 t3 = 0 playsound('system_ready.wav') # loop for video while True: loop_check, original_frame = vid.read( ) # loop_check is bool value for reading correctly or not # cv2.imshow("org",original_frame) if not loop_check: return True prevTime = newTime newTime = time.time() t1 = time.time() bboxes = self.Yolo.predict(original_frame) t2 = time.time() # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] #tracking for bbox in bboxes: #loop to sperate the bounding boxes in the frames if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) scoreVal = bbox[4] class_id = int(bbox[5]) boxes.append([x1, y1, x2, y2]) scores.append(scoreVal) label = NUM_CLASS[class_id] names.append(label) #self.image = cv2.rectangle(original_frame, (x1, y1), (x2, y2), (255, 0, 0), 2) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_frame, boxes)) # create deep sort object for detection detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] if realTime: tracked_bboxes = time_to_contact(original_frame, tracker.matchedBoxes, newTime, prevTime, key_list, val_list, display_tm=display_tm) else: tracked_bboxes = time_to_contact(original_frame, tracker.matchedBoxes, dummy_time, dummy_time - 0.01666666666, key_list, val_list, display_tm=display_tm) # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # draw detection on frame self.image = self.draw_bbox(original_frame, tracked_bboxes, detection_colors, NUM_CLASS, tracking=True) # calculating fps t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}". format(ms, fps, fps2)) if output_path != '': out.write(self.image) if False: cv2.imshow('Tracked', self.image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def main(yolo): start = time.time() max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.3 counter = [] #deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) find_objects = ['person'] metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(args["input"]) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('./output/output.avi', fourcc, 15, (w, h)) list_file = open('detection_rslt.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() classIDs = [] #image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) #bgr to rgb boxs, confidence, class_names = yolo.detect_image(image) features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) i = int(0) indexIDs = [] c = [] boxes = [] center2 = [] co_info = [] x_l = [] y_l = [] s_close_pair = [] for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) #print(class_names) #print(class_names[p]) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue #boxes.append([track[0], track[1], track[2], track[3]]) indexIDs.append(int(track.track_id)) counter.append(int(track.track_id)) bbox = track.to_tlbr() color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] #print(frame_index) list_file.write(str(frame_index) + ',') list_file.write(str(track.track_id) + ',') cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) b0 = str(bbox[0] ) #.split('.')[0] + '.' + str(bbox[0]).split('.')[0][:1] b1 = str(bbox[1] ) #.split('.')[0] + '.' + str(bbox[1]).split('.')[0][:1] b2 = str(bbox[2] - bbox[0] ) #.split('.')[0] + '.' + str(bbox[3]).split('.')[0][:1] b3 = str(bbox[3] - bbox[1]) list_file.write( str(b0) + ',' + str(b1) + ',' + str(b2) + ',' + str(b3)) #print(str(track.track_id)) list_file.write('\n') #list_file.write(str(track.track_id)+',') cv2.putText(frame, "ID:" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2) if len(class_names) > 0: class_name = class_names[0] cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) i += 1 #bbox_center_point(x,y) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) #track_id[center] pts[track.track_id].append(center) thickness = 5 # draw distance line (w, h) = (bbox[2], bbox[3]) center2.append(center) co_info.append([w, h, center2]) #print(center2) #calculateDistance if len(center2) > 2: for i in range(len(center2)): for j in range(len(center2)): #g = isclose(co_info[i],co_info[j]) #D = dist.euclidean((center2[i]), (center2[j])) x1 = center2[i][0] y1 = center2[i][1] x2 = center2[j][0] y2 = center2[j][1] dis = calculateDistance(x1, y1, x2, y2) if dis < 200: #print(dis) cv2.line(frame, (center2[i]), (center2[j]), (0, 128, 255), 2) if dis < 100: #x_l.append(center2[i]) cv2.line(frame, (center2[i]), (center2[j]), (0, 0, 255), 5) #cv2.putText(frame, "KEEP DISTANCE",(int(960), int(1060)),0, 5e-3 * 200, (0,0,255),2) else: pass #center point cv2.circle(frame, (center), 1, color, thickness) # draw motion path for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) #cv2.line(frame,(pts[track.track_id][j-1]), (pts[track.track_id][j]),(color),thickness) count = len(set(counter)) cv2.putText(frame, "Total Pedestrian Counter: " + str(count), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Pedestrian Counter: " + str(i), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps * 2), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) cv2.namedWindow("YOLO3_Deep_SORT", 0) cv2.resizeWindow('YOLO3_Deep_SORT', 1024, 768) cv2.imshow('YOLO3_Deep_SORT', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps = (fps + (1. / (time.time() - t1))) / 2 out.write(frame) frame_index = frame_index + 1 # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break print(" ") print("[Finish]") end = time.time() if len(pts[track.track_id]) != None: print(args["input"][43:57] + ": " + str(count) + " " + str(class_name) + ' Found') else: print("[No Found]") video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(_argv): cam1 = open('cam1.txt', 'w') cam2 = open('cam2.txt', 'w') cam3 = open('cam3.txt', 'w') cam4 = open('cam4.txt', 'w') # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 previous = [] current = [] # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 # print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes utils.save_files(cam1, cam2, cam3, cam4, count) bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen current = [] current.append(class_name + "-" + str(track.track_id)) color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) if len(previous) > 0: for prev in previous: if prev not in current: print(prev) previous = current # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def Object_tracking(YoloV3, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only=[]): # Definition of the parameters max_cosine_distance = 0.7 nn_budget = None #initialize deep sort object model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) times = [] if video_path: vid = cv2.VideoCapture(video_path) # detect on video else: vid = cv2.VideoCapture(0) # detect from webcam # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 NUM_CLASS = read_class_names(CLASSES) key_list = list(NUM_CLASS.keys()) val_list = list(NUM_CLASS.values()) while True: _, img = vid.read() try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = tf.expand_dims(image_data, 0) t1 = time.time() pred_bbox = YoloV3.predict(image_data) t2 = time.time() times.append(t2 - t1) times = times[-20:] pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') # extract bboxes to boxes (x, y, width, height), scores and names boxes, scores, names = [], [], [] for bbox in bboxes: if len(Track_only) != 0 and NUM_CLASS[int( bbox[5])] in Track_only or len(Track_only) == 0: boxes.append([ bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int) - bbox[0].astype(int), bbox[3].astype(int) - bbox[1].astype(int) ]) scores.append(bbox[4]) names.append(NUM_CLASS[int(bbox[5])]) # Obtain all the detections for the given frame. boxes = np.array(boxes) names = np.array(names) scores = np.array(scores) features = np.array(encoder(original_image, boxes)) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( boxes, scores, names, features) ] # Pass detections to the deepsort object and obtain the track information. tracker.predict() tracker.update(detections) # Obtain info from the tracks tracked_bboxes = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 5: continue bbox = track.to_tlbr() # Get the corrected/predicted bounding box class_name = track.get_class( ) #Get the class name of particular object tracking_id = track.track_id # Get the ID for the particular track index = key_list[val_list.index( class_name)] # Get predicted object index by object name tracked_bboxes.append( bbox.tolist() + [tracking_id, index] ) # Structure data, that we could use it with our draw_bbox function ms = sum(times) / len(times) * 1000 fps = 1000 / ms # draw detection on frame image = draw_bbox(original_image, tracked_bboxes, CLASSES=CLASSES, tracking=True) image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # draw original yolo detection #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True) #print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def main(): global frame, frame_index, out, list_file, track, count start = time.time() # 参数定义 max_cosine_distance = 0.5 # 0.9 余弦距离的控制阈值 nn_budget = None nms_max_overlap = 0.3 # 非极大抑制的阈值 # 是否保存识别结果 write_video_flag = True counter = [] # load our serialized model from disk # print("[INFO] loading model...") net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"]) # deep_sort model_filename = 'model_data/market1501.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) video_capture = cv2.VideoCapture(args["input"]) obj_count_txt_filename = 'counter.txt' count_file = open(obj_count_txt_filename, 'a') count_file.write('\n') if write_video_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) # DIVX, XVID, MJPG, X264, WMV1, WMV2.(XVID is more preferable.MJPG results in high size ideo.X264 gives ery small size video) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter( os.path.join('video', str(args['input'].split('.')[0][-7:]) + '_out.avi'), fourcc, 20, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 # 帧率计数 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: print("Can't receive frame (stream end?). Exiting ...") break time1 = time.time() # frame = imutils.resize(frame, width=800) (h, w) = frame.shape[:2] blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5) # predictions 检测 time2 = time.time() net.setInput(blob) detections = net.forward() # detections.shape # >>> (1, 1, n, 7) # eg:(1, 1, 2, 7) # [[[[0. 9. 0.42181703 0.4647404 0.610577 # 0.6360997 0.8479532] # [0. 15. 0.8989926 0.21603307 0.42735672 # 0.58441484 0.8699994]]]] boxs = [] class_names = [] for i in np.arange(0, detections.shape[2]): confidence = detections[0, 0, i, 2] # greater than the minimum confidence if confidence > args["confidence"]: idx = int(detections[0, 0, i, 1]) class_name = CLASSES[idx] # 筛选类别 if class_name in NEED_CLASSES: class_names.append(class_name) box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) # 转为整形坐标 (startX, startY, endX, endY) = box.astype("int") startX = 0 if startX < 0 else startX startY = 0 if startY < 0 else startY boxs.append([startX, startY, endX - startX, endY - startY]) print(boxs, class_names) time3 = time.time() print('detect cost is', time3 - time2) # 特征提取 features = encoder(frame, boxs) # score to 1.0 here). detections = [ Detection(bbox, class_name, 1.0, feature) for bbox, class_name, feature in zip(boxs, class_names, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] time4 = time.time() print('features extract is', time4 - time3) # Call the tracker tracker.predict() tracker.update(detections) time5 = time.time() print('update tracker cost:', time5 - time4) i = 0 # 跟踪器id indexIDs = [] for track in tracker.tracks: # todo and or if not track.is_confirmed() or track.time_since_update > 1: continue # boxes.append([track[0], track[1], track[2], track[3]]) indexIDs.append(track.track_id) counter.append(track.track_id) bbox = track.to_tlbr() start_x, start_y, end_x, end_y = bbox.astype('int') color = COLORS[indexIDs[i] % len(COLORS)].tolist() if not track.flag and track.class_name == 'person': track.flag = handle_face_car('person', start_x, start_y, end_x, end_y) else: track.flag = handle_face_car(track.class_name, start_x, start_y, end_x, end_y, not track.flag) # 画目标跟踪框、id标注 cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), color, 3) cv2.putText(frame, track.class_name + str(track.track_id), (int(bbox[0]), int(bbox[1] - 40)), 0, 0.75, color, 2) i += 1 # 画运动轨迹 draw motion path center = int(((bbox[0]) + (bbox[2])) / 2), int( ((bbox[1]) + (bbox[3])) / 2) pts[track.track_id].append(center) thickness = 5 cv2.circle(frame, center, 1, color, thickness) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / (j + 1.0)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), color, thickness) time6 = time.time() print('handle tracker cost:', time6 - time5) # 画目标检测白框 # for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) count = len(set(counter)) cv2.putText(frame, "Total Object Counter: " + str(count), (20, 120), 0, 0.75, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(i), (20, 80), 0, 0.75, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % fps, (20, 40), 0, 1.0, (0, 255, 0), 2) # time7 = time.time() # print('Draw Rectangle and Text cost:', time7 - time6) cv2.namedWindow("SSD_Deep_SORT", 0) cv2.resizeWindow('SSD_Deep_SORT', 1024, 768) cv2.imshow('SSD_Deep_SORT', frame) if write_video_flag: # save a frame out.write(frame) frame_index += 1 list_file.write(str(frame_index) + ' ') if len(boxs) != 0: for i in range(0, len(boxs)): list_file.write( str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ') list_file.write('\n') fps = (fps + (1. / (time.time() - time1))) / 2 # print(set(counter)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break print("[Finish]") end = time.time() # if len(pts[track.track_id]): # print(str(args["input"]) + ": " + str(count) + 'target Found') # count_file.write(str("[VIDEO]: " + args["input"]) + " " + ( # str(count)) + " " + "[MODEL]: MobileNetSSD" + " " + "[TIME]:" + (str('%.2f' % (end - start)))) # else: # print("[No Found]") video_capture.release() count_file.write('\n') count_file.close() if write_video_flag: out.release() list_file.close() cv2.destroyAllWindows()
def main(yolo): print('Using {} model'.format(yolo)) # Definition of the parameters max_cosine_distance = 0.2 nn_budget = None nms_max_overlap = 0.4 # deep_sort model_filename = 'model_data/models/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) # use to get feature metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric, max_age=100) output_frames = [] output_rectanger = [] output_areas = [] output_wh_ratio = [] is_vis = True out_dir = 'videos/output/' print('The output folder is',out_dir) if not os.path.exists(out_dir): os.mkdir(out_dir) all_frames = [] for video in args.videos: loadvideo = LoadVideo(video) video_capture, frame_rate, w, h = loadvideo.get_VideoLabels() while True: ret, frame = video_capture.read() if ret != True: video_capture.release() break all_frames.append(frame) frame_nums = len(all_frames) tracking_path = out_dir+'tracking'+'.avi' combined_path = out_dir+'allVideos'+'.avi' if is_vis: fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(tracking_path, fourcc, frame_rate, (w, h)) out2 = cv2.VideoWriter(combined_path, fourcc, frame_rate, (w, h)) #Combine all videos for frame in all_frames: out2.write(frame) out2.release() #Initialize tracking file filename = out_dir+'/tracking.txt' open(filename, 'w') fps = 0.0 frame_cnt = 0 t1 = time.time() track_cnt = dict() images_by_id = dict() ids_per_frame = [] for frame in all_frames: image = Image.fromarray(frame[...,::-1]) #bgr to rgb boxs = yolo.detect_image(image) # n * [topleft_x, topleft_y, w, h] features = encoder(frame,boxs) # n * 128 detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # length = n text_scale, text_thickness, line_thickness = get_FrameLabels(frame) # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.delete_overlap_box(boxes, nms_max_overlap, scores) #preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # length = len(indices) # Call the tracker tracker.predict() tracker.update(detections) tmp_ids = [] for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() area = (int(bbox[2]) - int(bbox[0])) * (int(bbox[3]) - int(bbox[1])) if bbox[0] >= 0 and bbox[1] >= 0 and bbox[3] < h and bbox[2] < w: tmp_ids.append(track.track_id) if track.track_id not in track_cnt: track_cnt[track.track_id] = [[frame_cnt, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), area]] images_by_id[track.track_id] = [frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]] else: track_cnt[track.track_id].append([frame_cnt, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), area]) images_by_id[track.track_id].append(frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]) cv2_addBox(track.track_id,frame,int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]),line_thickness,text_thickness,text_scale) write_results(filename,'mot',frame_cnt+1,str(track.track_id),int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]),w,h) ids_per_frame.append(set(tmp_ids)) # save a frame if is_vis: out.write(frame) t2 = time.time() frame_cnt += 1 print(frame_cnt, '/', frame_nums) if is_vis: out.release() print('Tracking finished in {} seconds'.format(int(time.time() - t1))) print('Tracked video : {}'.format(tracking_path)) print('Combined video : {}'.format(combined_path)) os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" reid = REID() threshold = 320 exist_ids = set() final_fuse_id = dict() print('Total IDs = ',len(images_by_id)) feats = dict() for i in images_by_id: print('ID number {} -> Number of frames {}'.format(i, len(images_by_id[i]))) feats[i] = reid._features(images_by_id[i]) #reid._features(images_by_id[i][:min(len(images_by_id[i]),100)]) ids_per_frame2 = copy.deepcopy(ids_per_frame) for f in ids_per_frame: if f: if len(exist_ids) == 0: for i in f: final_fuse_id[i] = [i] exist_ids = exist_ids or f else: new_ids = f-exist_ids for nid in new_ids: dis = [] if len(images_by_id[nid])<10: exist_ids.add(nid) continue unpickable = [] for i in f: for key,item in final_fuse_id.items(): if i in item: unpickable += final_fuse_id[key] print('exist_ids {} unpickable {}'.format(exist_ids,unpickable)) for oid in (exist_ids-set(unpickable))&set(final_fuse_id.keys()): tmp = np.mean(reid.compute_distance(feats[nid],feats[oid])) print('nid {}, oid {}, tmp {}'.format(nid, oid, tmp)) dis.append([oid, tmp]) exist_ids.add(nid) if not dis: final_fuse_id[nid] = [nid] continue dis.sort(key=operator.itemgetter(1)) if dis[0][1] < threshold: combined_id = dis[0][0] images_by_id[combined_id] += images_by_id[nid] final_fuse_id[combined_id].append(nid) else: final_fuse_id[nid] = [nid] print('Final ids and their sub-ids:',final_fuse_id) print('MOT took {} seconds'.format(int(time.time() - t1))) t2 = time.time() # To generate MOT for each person, declare 'is_vis' to True is_vis=False if is_vis: print('Writing videos for each ID...') output_dir = 'videos/output/tracklets/' if not os.path.exists(output_dir): os.mkdir(output_dir) loadvideo = LoadVideo(combined_path) video_capture,frame_rate, w, h = loadvideo.get_VideoLabels() fourcc = cv2.VideoWriter_fourcc(*'MJPG') for idx in final_fuse_id: tracking_path = os.path.join(output_dir, str(idx)+'.avi') out = cv2.VideoWriter(tracking_path, fourcc, frame_rate, (w, h)) for i in final_fuse_id[idx]: for f in track_cnt[i]: video_capture.set(cv2.CAP_PROP_POS_FRAMES, f[0]) _, frame = video_capture.read() text_scale, text_thickness, line_thickness = get_FrameLabels(frame) cv2_addBox(idx, frame, f[1], f[2], f[3], f[4], line_thickness, text_thickness, text_scale) out.write(frame) out.release() video_capture.release() # Generate a single video with complete MOT/ReID if args.all: loadvideo = LoadVideo(combined_path) video_capture, frame_rate, w, h = loadvideo.get_VideoLabels() fourcc = cv2.VideoWriter_fourcc(*'MJPG') complete_path = out_dir+'/Complete'+'.avi' out = cv2.VideoWriter(complete_path, fourcc, frame_rate, (w, h)) for frame in range(len(all_frames)): frame2 = all_frames[frame] video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame) _, frame2 = video_capture.read() for idx in final_fuse_id: for i in final_fuse_id[idx]: for f in track_cnt[i]: #print('frame {} f0 {}'.format(frame,f[0])) if frame == f[0]: text_scale, text_thickness, line_thickness = get_FrameLabels(frame2) cv2_addBox(idx, frame2, f[1], f[2], f[3], f[4], line_thickness, text_thickness, text_scale) out.write(frame2) out.release() video_capture.release() os.remove(combined_path) print('\nWriting videos took {} seconds'.format(int(time.time() - t2))) print('Final video at {}'.format(complete_path)) print('Total: {} seconds'.format(int(time.time() - t1)))
def main(): start = time.time() counter = [] writeVideo_flag = False fps = 0.0 filename_path = os.path.join(result_path, 'submission.txt') list_video, list_ids = load_list_video(list_video_path, id_path) result_file = open(filename_path, 'w') max_cosine_distance=0.8 nn_budget = 100 nms_max_overlap = 1.0 display = True for video in list_video: path = os.path.join(video_path, video) ROI = load_roi(zones_path, video) vis=visualization.Visualization(img_shape=(960,1280,3), update_ms=2000) metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) results = [] print("Processing video: ", video ) video_capture = cv2.VideoCapture(path) pause_display = False frame_num = 0 while True: start = time.time() # print(count) video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_num) ret, frame = video_capture.read() # frame shape 640*480*3 # gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) # frame = np.zeros_like(frame1) # frame[:,:,0] = gray # frame[:,:,1] = gray # frame[:,:,2] = gray if ret != True: break # print(count1) #print(frame.shape) w = int(video_capture.get(3)) h = int(video_capture.get(4)) result = [] t1 = time.time() img = letterbox(frame, new_shape=img_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) dets = run_detect(model,img,device,frame) detectionss=[] for det in dets: feature = gdet.HOG_feature(frame, det[:4]) detectionss.append(Detection(det[:4], det[4], feature, det[-1])) #detectionss.append(Detection(det[:4], det[4], det[-1]) for det in dets) img = np.zeros((h, w, 3), np.uint8) img = frame.copy() min_confidence = 0.4 detections = [d for d in detectionss if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) if display: vis.set_image(frame.copy()) vis.draw_detections(detections) vis.draw_trackers(tracker.tracks) res = vis.return_img() draw_roi(ROI, res) cv2.imshow('frame', res) print('frame_num', frame_num) if not pause_display: key = cv2.waitKey(2) if key == ord('q'): break if key == ord(' '): pause_display = not pause_display frame_num += 1 else: key = cv2.waitKey(0) if key == ord('q'): break if key == ord(' '): pause_display = not pause_display print(" ") print("[Finish]") video_capture.release() if writeVideo_flag: out.release() #list_file.close() result_file.close() cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 1.0 #initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) if FLAGS.tiny: yolo = YoloV3Tiny(classes=FLAGS.num_classes) else: yolo = YoloV3(classes=FLAGS.num_classes) yolo.load_weights(FLAGS.weights) logging.info('weights loaded') class_names = [c.strip() for c in open(FLAGS.classes).readlines()] logging.info('classes loaded') try: vid = cv2.VideoCapture(int(FLAGS.video)) except: vid = cv2.VideoCapture(FLAGS.video) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 count = 0 while True: _, img = vid.read() img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_in = tf.expand_dims(img_in, 0) img_in = transform_images(img_in, FLAGS.size) t1 = time.time() boxes, scores, classes, nums = yolo.predict(img_in) classes = classes[0] names = [] for i in range(len(classes)): names.append(class_names[int(classes[i])]) names = np.array(names) converted_boxes = convert_boxes(img, boxes[0]) features = encoder(img, converted_boxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( converted_boxes, scores[0], names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima suppresion boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue submit() bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) # if name_1 == "Select Player" or name_2 == "Select Player" or name_3 == "Select Player" or name_4 == "Select Player" or name_5 == "Select Player" or height_1 == NameError or height_2 == NameError or height_3 == NameError or height_4 == NameError or height_5 == NameError: # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) if class_name + str(track.track_id) == "Player1": cv2.putText(img, name_1, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # print("1: ", int(bbox[3])) s_height0 = ((int(bbox[3]) - (int(bbox[1]))) / height_1) * 1.15 new_height_player1 = int(int(bbox[3]) - int(s_height0)) cv2.line(img, (int(bbox[0]), int(new_height_player1)), (int(bbox[2]), int(new_height_player1)), (0, 255, 0), 2) if class_name + str(track.track_id) == "Player2": cv2.putText(img, name_2, (int(bbox[0]), int(bbox[1])), 0, 0.75, (255, 255, 255), 2) # print("2: ", int(bbox[3])) s_height1 = ((int(bbox[3]) - (int(bbox[1]))) / height_2) * 1.15 new_height_player2 = int(int(bbox[3]) - int(s_height1)) cv2.line(img, (int(bbox[0]), int(new_height_player2)), (int(bbox[2]), int(new_height_player2)), (0, 255, 0), 2) if class_name + str(track.track_id) == "Player3": cv2.putText(img, name_3, (int(bbox[0]), int(bbox[1])), 0, 0.75, (255, 255, 255), 2) # print("2: ", int(bbox[3])) s_height2 = ((int(bbox[3]) - (int(bbox[1]))) / height_3) * 1.15 new_height_player3 = int(int(bbox[3]) - int(s_height2)) cv2.line(img, (int(bbox[0]), int(new_height_player3)), (int(bbox[2]), int(new_height_player3)), (0, 255, 0), 2) if class_name + str(track.track_id) == "Player4": cv2.putText(img, name_4, (int(bbox[0]), int(bbox[1])), 0, 0.75, (255, 255, 255), 2) # print("2: ", int(bbox[3])) s_height3 = ((int(bbox[3]) - (int(bbox[1]))) / height_4) * 1.15 new_height_player4 = int(int(bbox[3]) - int(s_height3)) cv2.line(img, (int(bbox[0]), int(new_height_player4)), (int(bbox[2]), int(new_height_player4)), (0, 255, 0), 2) if class_name + str(track.track_id) not in { 'Player1', 'Player2', 'Player3', 'Player4' }: label7.configure(text=class_name + str(track.track_id)) if class_name + str(track.track_id) == "Player" + str( track.track_id): cv2.putText(img, name_5, (int(bbox[0]), int(bbox[1])), 0, 0.75, (255, 255, 255), 2) # print("2: ", int(bbox[3])) s_height4 = ((int(bbox[3]) - (int(bbox[1]))) / height_5) * 1.15 new_height_player5 = int(int(bbox[3]) - int(s_height4)) cv2.line(img, (int(bbox[0]), int(new_height_player5)), (int(bbox[2]), int(new_height_player5)), (0, 255, 0), 2) ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN #for det in detections: # bbox = det.to_tlbr() # cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) # print fps on screen fps = (fps + (1. / (time.time() - t1))) / 2 cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) cv2.imshow('output', img) if FLAGS.output: out.write(img) frame_index = frame_index + 1 list_file.write(str(frame_index) + ' ') if len(converted_boxes) != 0: for i in range(0, len(converted_boxes)): list_file.write( str(converted_boxes[i][0]) + ' ' + str(converted_boxes[i][1]) + ' ' + str(converted_boxes[i][2]) + ' ' + str(converted_boxes[i][3]) + ' ') list_file.write('\n') # press q to quit if cv2.waitKey(1) == ord('q'): break vid.release() if FLAGS.output: out.release() list_file.close() cv2.destroyAllWindows()
if cv2.__version__.split(".")[0] == "2": frame_count = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) else: # opencv 3/4 frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT) # initialize tracking module if args.get_tracking: tracking_objs = args.tracking_objs.split(",") tracker_dict = {} tracking_results_dict = {} tmp_tracking_results_dict = {} for tracking_obj in tracking_objs: metric = metric = nn_matching.NearestNeighborDistanceMetric( "cosine", args.max_cosine_distance, args.nn_budget) tracker_dict[tracking_obj] = Tracker( metric, max_iou_distance=args.max_iou_distance) tracking_results_dict[tracking_obj] = [] tmp_tracking_results_dict[tracking_obj] = {} # videoname = os.path.splitext(os.path.basename(videofile))[0] videoname = os.path.basename(videofile) if args.out_dir is not None: # not saving box json to save time video_out_path = os.path.join(args.out_dir, videoname) if not os.path.exists(video_out_path): os.makedirs(video_out_path) # for box feature, saving them to disk if needed if args.get_box_feat: feat_out_path = os.path.join(args.box_feat_path, videoname) if not os.path.exists(feat_out_path): os.makedirs(feat_out_path)
def __init__(self, max_age=120, max_cosine_distance=0.4): nn_budget = None metric = nn_matching.NearestNeighborDistanceMetric( "cosine", max_cosine_distance, nn_budget) self.tracker = Tracker(metric, max_age=max_age) self.encoder = load_encoder()
parser.add_argument('--model_feature', type=str, default='model_data/market1501.pb', help='target tracking model file.') ARGS = parser.parse_args() box_size = 2 # 边框大小 font_scale = 0.4 # 字体比例大小 if __name__ == '__main__': # Deep SORT 跟踪器 encoder = generate_detections.create_box_encoder(ARGS.model_feature, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", ARGS.min_score, None) tracker = Tracker(metric) # 载入模型 mrcnn = MRCNN(ARGS.model_file, ARGS.input_size, ARGS.min_score) # 读取视频 video = cv2.VideoCapture(ARGS.video_file) # 输出保存视频 fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = video.get(cv2.CAP_PROP_FPS) size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))) video_out = cv2.VideoWriter(out_path + "/outputVideo.mp4", fourcc, fps, size)
def run(sequence_dir, detection_file, output_dir, min_confidence, nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget, display): """Run multi-target tracker on a particular sequence. Parameters ---------- sequence_dir : str Path to the MOTChallenge sequence directory. detection_file : str Path to the detections file. output_file : str Path to the tracking output file. This file will contain the tracking results on completion. min_confidence : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value. nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). min_detection_height : int Detection height threshold. Disregard all detections that have a height lower than this value. max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. """ seq_info = gather_sequence_info(sequence_dir, detection_file) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) results = [] def frame_callback(vis, frame_idx): print("Processing frame %05d" % frame_idx) # Load image and generate detections. detections = create_detections(seq_info["detections"], frame_idx, min_detection_height) detections = [d for d in detections if d.confidence >= min_confidence] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Update tracker. tracker.predict() tracker.update(detections) # Update visualization. if display: image = cv2.imread(seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR) vis.set_image(image.copy()) vis.draw_detections(detections) vis.draw_trackers(tracker.tracks) # Store results. for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlwh() boxes = track.tlwh #boxes = detection.tlwh results.append([ frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3], boxes[0], boxes[1], boxes[2], boxes[3] ]) # Run tracker. if display: visualizer = visualization.Visualization(seq_info, update_ms=5) else: visualizer = visualization.NoVisualization(seq_info) visualizer.run(frame_callback) # Store results. if not os.path.exists(output_dir): os.makedirs(output_dir) output_file = os.path.join(output_dir, os.path.split(sequence_dir)[-1] + '.txt') f = open(output_file, 'w') for row in results: print('%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]), file=f)
def main(yolo,read_type): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) #writeVideo_flag = True #geneate a video object video_dir='./model_data/demo2.wmv' video=video_open(read_type,video_dir) video_capture = video.generate_video() fps=0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break; t1 = time.time() # 1、yolov3进行目标检测,找到目标位置和相应信息 # 2、跟踪物体在图像中的变化轨迹 # 1、yolov3进行目标检测,找到目标位置和相应信息 image = Image.fromarray(frame) time3=time.time() boxs = yolo.detect_image(image) time4=time.time() print('detect cost is',time4-time3) # print("box_num",len(boxs)) time3=time.time() features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] time4=time.time() print('features extract is',time4-time3) # Call the tracker # 2、跟踪物体在图像中的变化轨迹。利用卡尔曼滤波进行位置修正 tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update >1 : continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) cv2.imshow('', frame) fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) writeVideo_flag = True video_capture = cv2.VideoCapture(0) if writeVideo_flag: # Define the codec and create VideoWriter object w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h)) list_file = open('detection.txt', 'w') frame_index = -1 fps = 0.0 while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break; t1 = time.time() image = Image.fromarray(frame) boxs = yolo.detect_image(image) # print("box_num",len(boxs)) features = encoder(frame,boxs) # score to 1.0 here). detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if track.is_confirmed() and track.time_since_update >1 : continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2) cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2) for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2) cv2.imshow('', frame) if writeVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 list_file.write(str(frame_index)+' ') if len(boxs) != 0: for i in range(0,len(boxs)): list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ') list_file.write('\n') fps = ( fps + (1./(time.time()-t1)) ) / 2 print("fps= %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release() if writeVideo_flag: out.release() list_file.close() cv2.destroyAllWindows()