def run(self): tracker = KCF.kcftracker(False, False, False, False) while True: if self.detecting.value is True: continue elif self.initracker.value is True: print('initing') frame = self.image_in[:].copy() tracker.init(self.boundingbox[:], frame) self.detecting.value = False self.initracker.value = False self.tracking.value = True elif self.tracking.value is True: #print('tracking') frame = self.image_in[:].copy() tracker_box = tracker.update(frame) tracker_box = list(map(int, tracker_box)) box = [tracker_box[1], tracker_box[0], tracker_box[1]+tracker_box[3], tracker_box[0]+tracker_box[2]] roi_gray = cv2.cvtColor(frame[box[0]:box[2],box[1]:box[3],], cv2.COLOR_BGR2HSV) self.xyz_ang[:] = calc_xyz_ang(roi_gray, box) self.flag.value += 1 if self.flag.value > 20: self.flag.value = 0 self.detecting.value = True self.initracker.value = False self.tracking.value = False
def run(self): # create kcftracker instance tracker = KCF.kcftracker(False, False, False, False) # hog, fixed_window, multiscale, lab while True: # detector process is runing, tracker process blocks if self.detecting.value is True: continue # successfully get boundingbox from detector elif self.initracker.value is True: print('initing') frame = self.image_in[:].copy() tracker.init(self.boundingbox[:], frame) self.detecting.value = False self.initracker.value = False self.tracking.value = True # start tracking elif self.tracking.value is True: print('tracking') frame = self.image_in[:].copy() tracker_box = tracker.update(frame) # [xmin,ymin,w,h] tracker_box = list(map(int, tracker_box)) # transform format box = [ tracker_box[1], tracker_box[0], tracker_box[1] + tracker_box[3], tracker_box[0] + tracker_box[2] ] self.flag.value += 1 if self.flag.value > 20: self.flag.value = 0 self.detecting.value = True self.initracker.value = False self.tracking.value = False
def prepare_tracker(self): """ prepares KCF tracker """ sys.path.append(os.getcwd()+'/rod/kcf') import KCF self._tracker = KCF.kcftracker(False, True, False, False) self._tracker_counter = 0 self._track = False
def __init__(self, tracker_type, bbox, img, keep_height_ratio=1.): """ Wrapper class for various visual trackers." Args: tracker_type (str): name of the tracker. either the ones provided by opencv-contrib or KCF2 for a different implementation for KCF (requires https://github.com/uoip/KCFcpp-py-wrapper) bbox (tuple): box to initialize the tracker (x1, y1, x2, y2) img (numpy.ndarray): image to intialize the tracker keep_height_ratio (float, optional): float between 0.0 and 1.0 that determines the ratio of height of the object to track to the total height of the object for visual tracking. """ if tracker_type == 'KCF2' and not KCF: tracker_type = 'KCF' if not VisTracker.kcf2_warning_printed: print( "[warning] KCF2 not available, falling back to KCF. please see README.md for further details" ) VisTracker.kcf2_warning_printed = True self.tracker_type = tracker_type self.keep_height_ratio = keep_height_ratio if tracker_type == 'CSRT': self.vis_tracker = cv.TrackerCSRT_create() elif tracker_type == 'BOOSTING': self.vis_tracker = cv.TrackerBoosting_create() elif tracker_type == 'MIL': self.vis_tracker = cv.TrackerMIL_create() elif tracker_type == 'KCF': self.vis_tracker = cv.TrackerKCF_create() elif tracker_type == 'KCF2': self.vis_tracker = KCF.kcftracker( False, True, False, False) # hog, fixed_window, multiscale, lab elif tracker_type == 'TLD': self.vis_tracker = cv.TrackerTLD_create() elif tracker_type == 'MEDIANFLOW': self.vis_tracker = cv.TrackerMedianFlow_create() elif tracker_type == 'GOTURN': self.vis_tracker = cv.TrackerGOTURN_create() elif tracker_type == 'NONE': # dummy tracker that does nothing but fail self.vis_tracker = None self.ok = False return else: raise ValueError("Unknown tracker type '{}".format(tracker_type)) y_max = img.shape[0] - 1 x_max = img.shape[1] - 1 # bbox = list(bbox) if self.tracker_type == 'KCF2': self.vis_tracker.init(bbox, img) self.ok = True else: self.ok = self.vis_tracker.init(img, tuple(bbox)) pass
def detection(model,config): # Tracker if config.USE_TRACKER: import sys sys.path.append(os.getcwd()+'/stuff/kcf') import KCF tracker = KCF.kcftracker(False, True, False, False) tracker_counter = 0 track = False print("> Building Graph") # tf Session Config tf_config = model.tf_config detection_graph = model.detection_graph category_index = model.category_index with detection_graph.as_default(): with tf.Session(graph=detection_graph,config=tf_config) as sess: # start Videostream vs = WebcamVideoStream(config.VIDEO_INPUT,config.WIDTH,config.HEIGHT).start() # Define Input and Ouput tensors tensor_dict = model.get_tensordict(['num_detections', 'detection_boxes', 'detection_scores','detection_classes', 'detection_masks']) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Mask Transformations if 'detection_masks' in tensor_dict: # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, vs.real_height, vs.real_width) detection_masks_reframed = tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0) if config.SPLIT_MODEL: score_out = detection_graph.get_tensor_by_name('Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name('Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1_1:0') # Threading score = model.score expand = model.expand gpu_worker = SessionWorker("GPU",detection_graph,tf_config) cpu_worker = SessionWorker("CPU",detection_graph,tf_config) gpu_opts = [score_out, expand_out] cpu_opts = [tensor_dict['detection_boxes'], tensor_dict['detection_scores'], tensor_dict['detection_classes'], tensor_dict['num_detections']] gpu_counter = 0 cpu_counter = 0 fps = FPS(config.FPS_INTERVAL).start() print('> Starting Detection') while vs.isActive(): # Detection if not (config.USE_TRACKER and track): if config.SPLIT_MODEL: # split model in seperate gpu and cpu session threads masks = None # No Mask Detection possible yet if gpu_worker.is_sess_empty(): # read video frame, expand dimensions and convert to rgb frame = vs.read() # put new queue gpu_feeds = {image_tensor: vs.expanded()} if config.VISUALIZE: gpu_extras = frame # for visualization frame else: gpu_extras = None gpu_worker.put_sess_queue(gpu_opts,gpu_feeds,gpu_extras) g = gpu_worker.get_result_queue() if g is None: # gpu thread has no output queue. ok skip, let's check cpu thread. gpu_counter += 1 else: # gpu thread has output queue. gpu_counter = 0 score,expand,frame = g["results"][0],g["results"][1],g["extras"] if cpu_worker.is_sess_empty(): # When cpu thread has no next queue, put new queue. # else, drop gpu queue. cpu_feeds = {score_in: score, expand_in: expand} cpu_extras = frame cpu_worker.put_sess_queue(cpu_opts,cpu_feeds,cpu_extras) c = cpu_worker.get_result_queue() if c is None: # cpu thread has no output queue. ok, nothing to do. continue cpu_counter += 1 continue # If CPU RESULT has not been set yet, no fps update else: cpu_counter = 0 boxes, scores, classes, num, frame = c["results"][0],c["results"][1],c["results"][2],c["results"][3],c["extras"] else: # default session frame = vs.read() output_dict = sess.run(tensor_dict, feed_dict={image_tensor: vs.expanded()}) num = output_dict['num_detections'][0] classes = output_dict['detection_classes'][0] boxes = output_dict['detection_boxes'][0] scores = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: masks = output_dict['detection_masks'][0] else: masks = None # reformat detection num = int(num) boxes = np.squeeze(boxes) classes = np.squeeze(classes).astype(np.uint8) scores = np.squeeze(scores) # Visualization vis = vis_detection(frame, boxes, classes, scores, masks, category_index, fps.fps_local(), config.VISUALIZE, config.DET_INTERVAL, config.DET_TH, config.MAX_FRAMES, fps._glob_numFrames, config.OD_MODEL_NAME) if not vis: break # Activate Tracker if config.USE_TRACKER and num <= config.NUM_TRACKERS: tracker_frame = frame track = True first_track = True # Tracking else: frame = vs.read() if first_track: trackers = [] tracker_boxes = boxes for box in boxes[~np.all(boxes == 0, axis=1)]: tracker.init(conv_detect2track(box,vs.real_width, vs.real_height), tracker_frame) trackers.append(tracker) first_track = False for idx,tracker in enumerate(trackers): tracker_box = tracker.update(frame) tracker_boxes[idx,:] = conv_track2detect(tracker_box, vs.real_width, vs.real_height) vis = vis_detection(frame, tracker_boxes, classes, scores, masks, category_index, fps.fps_local(), config.VISUALIZE, config.DET_INTERVAL, config.DET_TH, config.MAX_FRAMES, fps._glob_numFrames, config.OD_MODEL_NAME) if not vis: break tracker_counter += 1 #tracker_frame = frame if tracker_counter >= config.TRACKER_FRAMES: track = False tracker_counter = 0 fps.update() # End everything vs.stop() fps.stop() if config.SPLIT_MODEL: gpu_worker.stop() cpu_worker.stop()
def detection(detection_graph, category_index, score, expand): print("Building Graph") # Session Config: allow seperate GPU/CPU adressing and limit memory allocation config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device) config.gpu_options.allow_growth = allow_memory_growth with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Define Input and Ouput tensors image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') if split_model: score_out = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1_1:0') # Threading gpu_worker = SessionWorker("GPU", detection_graph, config) cpu_worker = SessionWorker("CPU", detection_graph, config) gpu_opts = [score_out, expand_out] cpu_opts = [ detection_boxes, detection_scores, detection_classes, num_detections ] gpu_counter = 0 cpu_counter = 0 # Start Video Stream, FPS calculation and Tracker fps = FPS2(fps_interval).start() video_stream = WebcamVideoStream(video_input, width, height).start() #tracker = create_tracker(tracker_type) tracker = KCF.kcftracker(False, True, False, False) real_width = video_stream.real_width real_height = video_stream.real_height tracker_counter = 0 track = False print("Press 'q' to Exit") print('Starting Detection') while video_stream.isActive(): # Detection if not (use_tracker and track): if split_model: # split model in seperate gpu and cpu session threads if gpu_worker.is_sess_empty(): # read video frame, expand dimensions and convert to rgb frame = video_stream.read() frame_expanded = np.expand_dims(cv2.cvtColor( frame, cv2.COLOR_BGR2RGB), axis=0) # put new queue gpu_feeds = {image_tensor: frame_expanded} if visualize: gpu_extras = frame # for visualization frame else: gpu_extras = None gpu_worker.put_sess_queue(gpu_opts, gpu_feeds, gpu_extras) g = gpu_worker.get_result_queue() if g is None: # gpu thread has no output queue. ok skip, let's check cpu thread. gpu_counter += 1 else: # gpu thread has output queue. gpu_counter = 0 score, expand, frame = g["results"][0], g[ "results"][1], g["extras"] if cpu_worker.is_sess_empty(): # When cpu thread has no next queue, put new queue. # else, drop gpu queue. cpu_feeds = { score_in: score, expand_in: expand } cpu_extras = frame cpu_worker.put_sess_queue( cpu_opts, cpu_feeds, cpu_extras) c = cpu_worker.get_result_queue() if c is None: # cpu thread has no output queue. ok, nothing to do. continue cpu_counter += 1 time.sleep(0.005) continue # If CPU RESULT has not been set yet, no fps update else: cpu_counter = 0 boxes, scores, classes, num, frame = c["results"][ 0], c["results"][1], c["results"][2], c[ "results"][3], c["extras"] else: # default session frame = video_stream.read() frame_expanded = np.expand_dims(cv2.cvtColor( frame, cv2.COLOR_BGR2RGB), axis=0) (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) # reformat detection num = int(num) boxes = np.squeeze(boxes) classes = np.squeeze(classes).astype(np.int32) scores = np.squeeze(scores) # visualize detection vis = visualize_detection(frame, boxes, classes, scores, category_index, fps) if not vis: break # Activate Tracker if use_tracker and num <= num_trackers: tracker_frame = frame track = True first_track = True # Tracking else: frame = video_stream.read() if first_track: trackers = [] tracker_boxes = boxes for box in boxes[~np.all(boxes == 0, axis=1)]: tracker.init( conv_detect2track(box, real_width, real_height), tracker_frame) trackers.append(tracker) first_track = False #print ("A: {}".format(boxes[~np.all(boxes == 0, axis=1)])) i = 0 for tracker in trackers: tracker_box = tracker.update(frame) #print ("B: {}".format(tracker_box)) tracker_boxes[i, :] = conv_track2detect( tracker_box, real_width, real_height) i += 1 #p1 = (tracker_box[0], tracker_box[1]) #p2 = (tracker_box[0] + tracker_box[2], tracker_box[1] + tracker_box[3]) #cv2.rectangle(frame, p1, p2, (255,0,0), 2) #cv2.imshow('object_detection', frame) #print ("C: {}".format(tracker_boxes[~np.all(tracker_boxes == 0, axis=1)])) vis = visualize_detection(frame, tracker_boxes, classes, scores, category_index, fps) if not vis: break tracker_counter += 1 #tracker_frame = frame if tracker_counter >= tracker_frames: track = False tracker_counter = 0 fps.update() # End everything if split_model: gpu_worker.stop() cpu_worker.stop() fps.stop() video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
if __name__ == '__main__': if(len(sys.argv)==1): cap = cv2.VideoCapture(0) elif(len(sys.argv)==2): if(sys.argv[1].isdigit()): # True if sys.argv[1] is str of a nonnegative integer cap = cv2.VideoCapture(int(sys.argv[1])) else: cap = cv2.VideoCapture(sys.argv[1]) inteval = 30 else: assert(0), "too many arguments" tracker = KCF.kcftracker(True, True, True, True, 0) # hog, fixed_window, multiscale, lab, threshold cv2.namedWindow('tracking') cv2.setMouseCallback('tracking',draw_boundingbox) while(cap.isOpened()): ret, frame = cap.read() if not ret: break if(selectingObject): cv2.rectangle(frame,(ix,iy), (cx,cy), (0,255,255), 1) elif(initTracking): cv2.rectangle(frame,(ix,iy), (ix+w,iy+h), (0,255,255), 2) tracker.init(frame, (ix,iy,w,h))
l = np.array(l) return [mx.nd.array(l)] a = True kcf = False font = cv2.FONT_HERSHEY_SIMPLEX while (a): if (kcf): ret1, framekcf = cap.read() height, width = framekcf.shape[:2] for objecta in (objects): tracker = KCF.kcftracker( False, True, False, False) # hog, fixed_window, multiscale, lab tracker.init([ objecta.xmin, objecta.ymin, objecta.xmax - objecta.xmin, objecta.ymax - objecta.ymin ], framekcf) boundingbox = tracker.update(framekcf) boundingbox = map(int, boundingbox) cv2.rectangle(framekcf, (boundingbox[0], boundingbox[1]), (boundingbox[0] + boundingbox[2], boundingbox[1] + boundingbox[3]), (0, 255, 0), 3) cv2.putText(framekcf, objecta.label, (boundingbox[0], boundingbox[1]), font, 1, (0, 255, 0), 2, cv2.LINE_AA) cv2.imshow("img", framekcf) kcf = False
def detection(detection_graph, category_index, score, expand): print("Building Graph") # Session Config: allow seperate GPU/CPU adressing and limit memory allocation config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=log_device) config.gpu_options.allow_growth = allow_memory_growth cur_frames = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=config) as sess: # Define Input and Ouput tensors image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') if split_model: score_out = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name( 'Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name( 'Postprocessor/ExpandDims_1_1:0') # Threading gpu_worker = SessionWorker("GPU", detection_graph, config) cpu_worker = SessionWorker("CPU", detection_graph, config) gpu_opts = [score_out, expand_out] cpu_opts = [ detection_boxes, detection_scores, detection_classes, num_detections ] gpu_counter = 0 cpu_counter = 0 # Start Video Stream and FPS calculation fps = FPS2(fps_interval).start() #video_stream = WebcamVideoStream(video_input,width,height).start() cap = cv2.VideoCapture(video_input) cur_frames = 0 print("Press 'q' to Exit") print('Starting Detection') kcf = False box_to_color_map = collections.defaultdict(str) box_to_display_str_map = collections.defaultdict(list) tracker = KCF.kcftracker(False, True, False, False) count = 0 ret1, image = cap.read() im_height, im_width = image.shape[:2] im_height /= 3 im_width /= 3 print im_height, im_width fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', fourcc, 30.0, (im_width, im_height)) while True: count += 1 if count == 500: break if kcf and use_kcf: start = timer() ret1, image = cap.read() image = cv2.resize(image, (im_width, im_height)) image_expanded = np.expand_dims(cv2.cvtColor( image, cv2.COLOR_BGR2RGB), axis=0) for box, color in box_to_color_map.items(): label = box_to_display_str_map[box] ymin, xmin, ymax, xmax = box xmin = (int)(xmin * im_width) ymin = (int)(ymin * im_height) xmax = (int)(xmax * im_width) ymax = (int)(ymax * im_height) tracker.init([xmin, ymin, xmax - xmin, ymax - ymin], preframe) boundingbox = tracker.update(image) boundingbox = map(int, boundingbox) cv2.rectangle(image, (boundingbox[0], boundingbox[1]), (boundingbox[0] + boundingbox[2], boundingbox[1] + boundingbox[3]), (0, 255, 255), 2) cv2.putText( image, label[0], (boundingbox[0], boundingbox[1] + boundingbox[3]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (77, 255, 9), 2) #print 'hello' if vis_text: cv2.putText(image, "fps: {}".format(fps.fps_local()), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) out.write(image) cv2.imshow('object_detection', image) cv2.waitKey(1) fps.update() kcf = False time_elapsed = timer() - start print( "Detection time in frame using KCF: {:.4f} sec fps {:.4f}" .format(time_elapsed, 1 / time_elapsed)) else: start = timer() kcf = True # actual Detection if split_model: # split model in seperate gpu and cpu session threads if gpu_worker.is_sess_empty(): # read video frame, expand dimensions and convert to rgb #image = video_stream.read() ret1, image = cap.read() preframe = image image_expanded = np.expand_dims(cv2.cvtColor( image, cv2.COLOR_BGR2RGB), axis=0) # put new queue gpu_feeds = {image_tensor: image_expanded} if visualize: gpu_extras = image # for visualization frame else: gpu_extras = None gpu_worker.put_sess_queue(gpu_opts, gpu_feeds, gpu_extras) g = gpu_worker.get_result_queue() if g is None: # gpu thread has no output queue. ok skip, let's check cpu thread. gpu_counter += 1 else: # gpu thread has output queue. gpu_counter = 0 score, expand, image = g["results"][0], g[ "results"][1], g["extras"] if cpu_worker.is_sess_empty(): # When cpu thread has no next queue, put new queue. # else, drop gpu queue. cpu_feeds = { score_in: score, expand_in: expand } cpu_extras = image cpu_worker.put_sess_queue( cpu_opts, cpu_feeds, cpu_extras) c = cpu_worker.get_result_queue() if c is None: # cpu thread has no output queue. ok, nothing to do. continue cpu_counter += 1 time.sleep(0.005) continue else: cpu_counter = 0 boxes, scores, classes, num, image = c["results"][ 0], c["results"][1], c["results"][2], c[ "results"][3], c["extras"] else: # default session #image = video_stream.read() ret1, image = cap.read() image = cv2.resize(image, (im_width, im_height)) preframe = image image_expanded = np.expand_dims(cv2.cvtColor( image, cv2.COLOR_BGR2RGB), axis=0) (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_expanded}) # Visualization of the results of a detection. if visualize: box_to_color_map, box_to_display_str_map = vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) for box, color in box_to_color_map.items(): label = box_to_display_str_map[box] ymin, xmin, ymax, xmax = box xmin = (int)(xmin * im_width) ymin = (int)(ymin * im_height) xmax = (int)(xmax * im_width) ymax = (int)(ymax * im_height) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 255), 2) cv2.putText(image, label[0], (xmin, ymax), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (77, 255, 9), 2) if vis_text: cv2.putText(image, "fps: {}".format(fps.fps_local()), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2) out.write(image) cv2.imshow('object_detection', image) # Exit Option if cv2.waitKey(1) & 0xFF == ord('q'): break time_elapsed = timer() - start print( "Detection time in frame using SSD: {:.4f} sec fps {:.4f}" .format(time_elapsed, 1 / time_elapsed)) else: cur_frames += 1 # Exit after max frames if no visualization for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)): if cur_frames % det_interval == 0 and score > det_th: label = category_index[_class]['name'] print("label: {}\nscore: {}\nbox: {}".format( label, score, box)) if cur_frames >= max_frames: break fps.update() # End everything if split_model: gpu_worker.stop() cpu_worker.stop() fps.stop() out.release() cap.release() #video_stream.stop() cv2.destroyAllWindows() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
if __name__ == '__main__': if(len(sys.argv)==1): cap = cv2.VideoCapture(0) elif(len(sys.argv)==2): if(sys.argv[1].isdigit()): # True if sys.argv[1] is str of a nonnegative integer cap = cv2.VideoCapture(int(sys.argv[1])) else: cap = cv2.VideoCapture(sys.argv[1]) inteval = 30 else: assert(0), "too many arguments" tracker = KCF.kcftracker(False, True, False, False) # hog, fixed_window, multiscale, lab cv2.namedWindow('tracking') cv2.setMouseCallback('tracking',draw_boundingbox) while(cap.isOpened()): ret, frame = cap.read() if not ret: break if(selectingObject): cv2.rectangle(frame,(ix,iy), (cx,cy), (0,255,255), 1) elif(initTracking): cv2.rectangle(frame,(ix,iy), (ix+w,iy+h), (0,255,255), 2) tracker.init([ix,iy,w,h], frame)
def main(): # Ouput settings. DRAW = True SAVE_TO_FILE = False SAVE_FRAMES = False font = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.8 thickness = 2 # Tracking settings. HOG = True FIXEDWINDOW = True MULTISCALE = True LAB = False fe = FeatureExtractor('trinet/checkpoint/checkpoint.ckpt-25000') gallery = Gallery() PATH_TOP = 'C:/E/Matlab/Object Tracking/dataset' cams = ['cam2', 'cam3'] queried_pids = dict() trackers = dict() outputs = dict() fdetects = dict() camera_matrices = dict() for cam in cams: queried_pids[cam] = [] trackers[cam] = [] fdetects[cam] = open(os.path.join(PATH_TOP, cam, 'detect.txt'), 'r') camera_matrices[cam] = read_camera_matrix( os.path.join(PATH_TOP, cam, 'camera_matrix_our.txt')) if DRAW: cv2.namedWindow(str(cam), 0) if SAVE_TO_FILE: outputs[cam] = open(os.path.join(PATH_TOP, cam, 'track23.txt'), 'w') start_count = cv2.getCPUTickCount() num_frames = len(os.listdir(os.path.join(PATH_TOP, 'cam1', 'img'))) # Main loop. for i in range(1, num_frames + 1): print('\n#frame %d' % i) for cam in cams: print('') print(' ', cam) print('') frame = cv2.imread( os.path.join(PATH_TOP, cam, 'img', '%04d.jpg' % i)) # Update tracking positions and delete some bad trackers. j = 0 while j < len(trackers[cam]): trackers[cam][j].update(frame) if trackers[cam][j].out_of_sight: print("Delete tracker %d due to out of sight" % trackers[cam][j].pid) del trackers[cam][j] elif trackers[cam][j].occluded_so_long: print("Delete tracker %d due to occluded so long" % trackers[cam][j].pid) del trackers[cam][j] else: j = j + 1 # Add new trackers every 10 frames, of course including the first frame. if i % 10 == time_to_detect[cam]: # Note we have not deleted the model-drifted trackers. # Sometimes good trackers are considered as model drift, # due to the imperfect criterion. # So if we delete these trackers and re-add them, the tracking result # may look consistent. # First, delete model-drifted trackers. j = 0 while j < len(trackers[cam]): if trackers[cam][j].model_drift: print("Delete tracker %d due to model drift" % trackers[cam][j].pid) del trackers[cam][j] else: j = j + 1 # Then, add new trackers. # Read detection results of current frame. # Locate current frame. while fdetects[cam].readline() != ('#frame %d\n' % i): pass num_detect = int(fdetects[cam].readline().split()[0]) detect_pos = np.zeros((num_detect, 4), dtype=np.int32) for j in range(num_detect): line = fdetects[cam].readline() splits = line.split() tmp = [int(k) for k in splits] # Detection bounding boxes are in the form of (x1, y1, x2, y2). detect_pos[j, :] = [ tmp[0], tmp[1], tmp[2] - tmp[0], tmp[3] - tmp[1] ] # Put tracking results together. track_pos = np.zeros((len(trackers[cam]), 4)) for j in range(len(trackers[cam])): track_pos[j, :] = trackers[cam][j].get_roi() # Determine which detection boxes are used to initialize new trackers. indices = detection_query(detect_pos, track_pos) if len(indices) > 0: # Person re-identification. feature_list = [] blacklist = [] for j in range(len(indices)): x, y, w, h = detect_pos[indices[j], :] person_img = frame[y:y + h, x:x + w, :] person_feature = fe.feature(adjust_image(person_img)) feature_list.append(person_feature) # Get blacklist for this person. one_blacklist = get_blacklist(x + w / 2, y + h, cam, trackers, camera_matrices) blacklist.append(one_blacklist) features_p = np.vstack(feature_list) gallery_pids = gallery.get_pids() pids_this_cam = [tracker.pid for tracker in trackers[cam]] queried_pids[cam] = gallery.query(features_p, cam, i, pids_this_cam, blacklist) # Initialize new trackers with the queried pids. # frame_b = frame.copy() # new_persons = False for j in range(len(indices)): tracker = KCF.kcftracker(queried_pids[cam][j], HOG, FIXEDWINDOW, MULTISCALE, LAB) tracker.init(list(detect_pos[indices[j], :]), frame) trackers[cam].append(tracker) if queried_pids[cam][j] in gallery_pids: print('---------------- Resume tracker %d' % queried_pids[cam][j]) else: print('---------------- Add tracker %d' % queried_pids[cam][j]) # new_persons = True # x, y, w, h = detect_pos[indices[j], :] # cv2.rectangle(frame_b, (x, y), (x + w, y + h), get_color(cam, -1, queried_pids), thickness, 8) # cv2.putText(frame_b, str(queried_pids[cam][j]), (x, y), font, font_scale, get_color(cam, -1, queried_pids), thickness) # if new_persons: # cv2.imwrite(os.path.join(PATH_TOP, 'cam1', 'gallery', '%d.jpg' % i), frame_b) # Draw and save trackers positions to file. if SAVE_TO_FILE: outputs[cam].write('#frame\t%d\n' % i) outputs[cam].write('%d\n' % len(trackers[cam])) for j in range(len(trackers[cam])): x, y, w, h = trackers[cam][j].get_roi() if SAVE_TO_FILE: outputs[cam].write('%d\t%d\t%d\t%d\t%d\n' % (trackers[cam][j].pid, x, y, w, h)) if DRAW: cv2.rectangle( frame, (x, y), (x + w, y + h), get_color(cam, trackers[cam][j].pid, queried_pids), thickness, 8) cv2.putText( frame, str(trackers[cam][j].pid), (x, y), font, font_scale, get_color(cam, trackers[cam][j].pid, queried_pids), thickness) if DRAW: cv2.imshow(str(cam), frame) cv2.waitKey(0) if i == num_frames: cv2.waitKey(0) if SAVE_FRAMES: # Save frames with tracking results. cv2.imwrite( os.path.join(PATH_TOP, cam, 'img_tracking_reid', '%04d.jpg' % i), frame) # Release resources. fe.close() for cam in cams: fdetects[cam].close() if SAVE_TO_FILE: for cam in cams: outputs[cam].close() elapsed_time_s = float(cv2.getCPUTickCount() - start_count) / cv2.getTickFrequency() fps = num_frames / elapsed_time_s print('%f fps' % fps)
def prepare_model(self, input_type): """ prepares Object_Detection model input_type: must be 'image' or 'video' """ assert input_type in ['image', 'video' ], "only 'image' or 'video' input possible" super(ObjectDetectionModel, self).prepare_model() self.input_type = input_type # Tracker if self.config.USE_TRACKER: sys.path.append(os.getcwd() + '/rod/kcf') import KCF self._tracker = KCF.kcftracker(False, True, False, False) self._tracker_counter = 0 self._track = False print("> Building Graph") with self.detection_graph.as_default(): with tf.Session(graph=self.detection_graph, config=self._tf_config) as self._sess: # Input Configuration if self.input_type is 'video': self._input_stream = VideoStream( self.config.VIDEO_INPUT, self.config.WIDTH, self.config.HEIGHT).start() height = self._input_stream.real_height width = self._input_stream.real_width elif self.input_type is 'image': self._input_stream = ImageStream( self.config.IMAGE_PATH, self.config.LIMIT_IMAGES, (self.config.WIDTH, self.config.HEIGHT)).start() height = self.config.HEIGHT width = self.config.WIDTH # Timeliner for image detection if self.config.WRITE_TIMELINE: self._run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) self._run_metadata = tf.RunMetadata() self.timeliner = TimeLiner() # Define Input and Ouput tensors self._tensor_dict = self.get_tensordict([ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]) self._image_tensor = self.detection_graph.get_tensor_by_name( 'image_tensor:0') # Mask Transformations if 'detection_masks' in self._tensor_dict: # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. detection_boxes = tf.squeeze( self._tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze( self._tensor_dict['detection_masks'], [0]) real_num_detection = tf.cast( self._tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = reframe_box_masks_to_image_masks( detection_masks, detection_boxes, height, width) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) self._tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) if self.config.SPLIT_MODEL: self._score_out = self.detection_graph.get_tensor_by_name( '{}:0'.format(self.config.SPLIT_NODES[0])) self._expand_out = self.detection_graph.get_tensor_by_name( '{}:0'.format(self.config.SPLIT_NODES[1])) self._score_in = self.detection_graph.get_tensor_by_name( '{}_1:0'.format(self.config.SPLIT_NODES[0])) self._expand_in = self.detection_graph.get_tensor_by_name( '{}_1:0'.format(self.config.SPLIT_NODES[1])) # Threading self._gpu_worker = SessionWorker("GPU", self.detection_graph, self._tf_config) self._cpu_worker = SessionWorker("CPU", self.detection_graph, self._tf_config) self._gpu_opts = [self._score_out, self._expand_out] self._cpu_opts = [ self._tensor_dict['detection_boxes'], self._tensor_dict['detection_scores'], self._tensor_dict['detection_classes'], self._tensor_dict['num_detections'] ] return self
def main(): print("Running...") flag_track2 = 0 count = 0 counttrack = 0 prev_y_pixel = 0 prev_x_pixel = 0 tetaperpixel = 0.994837 / 400.0 tracker = KCF.kcftracker(True, False, True, False) # hog, fixed_window, multiscale, lab counttrack2 = 0 prev_distance2 = 0 # grab one frame at first to compare for background substraction frame, timestamp = freenect.sync_get_video() frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame_resized = imutils.resize(frame, width=min(400, frame.shape[1])) frame_resized_grayscale = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY) print(frame_resized_grayscale.shape) # initialize centroid center = [[frame_resized.shape[1] / 2, frame_resized.shape[0] / 2]] center_fix = [] # defining min cuoff area #min_area=(480/400)*frame_resized.shape[1] min_area = (0.01) * frame_resized.shape[1] print(frame_resized.shape) # (300,400,3) boxcolor = (0, 255, 0) timeout = 0 #variable for counting time elapsed key = '' temp = 1 # save video countsave = 0 while key != 113: # for 'q' key # start timer timer = cv2.getTickCount() starttime = time.time() previous_frame = frame_resized_grayscale # retrieve new RGB frame image frame, timestamp = freenect.sync_get_video() frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame_resized = imutils.resize(frame, width=min(400, frame.shape[1])) frame_resized_grayscale = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY) #temp=background_subtraction(previous_frame, frame_resized_grayscale, min_area) # retrieve depth map depth, timestamp = freenect.sync_get_depth() depth = imutils.resize(depth, width=min(400, depth.shape[1])) print(depth.shape) depth2 = np.copy(depth) # orig = image.copy() if temp == 1: if (flag_track2 == 0): frame_processed, center_fix, pick2 = detect_people( frame_resized, center, frame_resized, boxcolor) if (len(center_fix) > 0): i = 0 for b in center_fix: #print(b) #print("Point "+str(i)+": "+str(b[0])+" "+str(b[1])) x_pixel = b[1] y_pixel = b[0] print("x1:" + str(x_pixel) + "y1:" + str(y_pixel)) rawDisparity = depth[(int)(x_pixel), (int)(y_pixel)] print("raw:" + str(rawDisparity)) distance = 1 / (-0.00307 * rawDisparity + 3.33) if (distance < 0): distance = 0.5 print("Distance : " + str(distance)) cv2.putText( frame_resized, "distance: {:.2f}".format(distance), (10, (frame_resized.shape[0] - (i + 1) * 25) - 50), font, 0.65, (0, 0, 255), 3) cv2.putText( frame_resized, "Point " + str(i) + ": " + str(b[0]) + " " + str(b[1]), (10, frame_resized.shape[0] - (i + 1) * 25), font, 0.65, (0, 0, 255), 3) i = i + 1 y_pix, x_pix = center_fix[0] endtime = time.time() #nucleo.write(("8,"+str(x_person)+","+str(y_person)).encode()) # send x_person and y_person if ((abs(prev_x_pixel - x_pix)) < 50 and (abs(prev_y_pixel - y_pix)) < 50): timeout = timeout + (endtime - starttime) if (timeout > 5): flag_track2 = 1 boxcolor = (255, 0, 0) else: timeout = 0 boxcolor = (0, 255, 0) prev_y_pixel, prev_x_pixel = y_pix, x_pix # DEBUGGING # #print("Teta: " + str(teta) + "Distance: " + str(distance)) print("Timeout: " + str(timeout)) #print ("Distance : " + str(distance)) elif (len(center_fix) <= 0): timeout = 0 boxcolor = (0, 255, 0) elif (flag_track2 == 1): if (counttrack2 == 0): iA, iB, iC, iD = pick2[0] # Draw new bounding box from body to only head figures tracker.init([iA, iB, iC - iA, iD - iB], frame_resized) counttrack2 = counttrack2 + 1 elif (counttrack2 == 1): print(pick2[0]) print("iA:" + str(iA) + "iB:" + str(iB) + "iC:" + str(iC) + "iD:" + str(iD)) boundingbox = tracker.update( frame_resized) #frame had better be contiguous boundingbox = list(map(int, boundingbox)) cv2.rectangle(frame_resized, (boundingbox[0], boundingbox[1]), (boundingbox[0] + boundingbox[2], boundingbox[1] + boundingbox[3]), (255, 0, 0), 3) #GENERAL ASSUMPTION SINGLE PERSON TRACKING # start tracking... x_track = ((boundingbox[2]) / 2.0) + boundingbox[0] y_track = ((boundingbox[3]) / 2.0) + boundingbox[1] print("x:" + str(x_track) + "y:" + str(y_track)) x_center = (frame_resized.shape[1] + 1) / 2 y_center = (frame_resized.shape[0] + 1) / 2 print(x_center, y_center) # compute teta asumsi distance lurus rawDisparity2 = depth2[(int)(y_track), (int)(x_track)] print("raw2:" + str(rawDisparity2)) distance2 = 1 / (-0.00307 * rawDisparity2 + 3.33) if (distance2 < 0): distance2 = prev_distance2 prev_distance2 = distance2 #realx = (x_track-x_center)+(distance/30.0) #teta = math.atan(realx/distance) # if distance is tangensial #teta = math.asin((0.026458333*(x_track-x_center)/distance)) # if distance is euclidean teta = (x_track - x_center) * tetaperpixel print("Teta: " + str(teta)) print("Distance2 : " + str(distance2)) cv2.putText(frame_resized, "distance: {:.2f}".format(distance2), (10, (frame_resized.shape[0] - (i + 1) * 25) - 50), font, 0.65, (0, 0, 255), 3) cv2.putText( frame_resized, "Point " + str(0) + ": " + str(x_track) + " " + str(y_track), (10, frame_resized.shape[0] - (i + 1) * 25), font, 0.65, (0, 0, 255), 3) # send the teta and distance #nucleo.flush() #if(teta<0.0): #flag= nucleo.write(("7,"+format(teta,'1.2f')+","+format(distance2,'1.3f')).encode()) #elif(teta>0.0): #flag= nucleo.write(("7,"+format(teta,'1.3f')+","+format(distance2,'1.3f')).encode()) #print("WRITEIN1" + str(flag)) print("Peak: " + str(tracker.getpeakvalue())) if (tracker.getpeakvalue() < 0.6): counttrack2 = 0 flag_track2 = 0 #nucleo.flush() #nucleo.write("8,,,,,,,,,,,,".encode()) print("WRITEOUT") #frame_resized = cv2.flip(frame_resized, 0) cv2.imshow("Detected Human", frame_resized) cv2.imshow("Depth", frame_convert2.pretty_depth_cv(depth)) #cv2.imshow("Depth2", frame_convert2.pretty_depth_cv(depth2)) # cv2.imshow("Original", frame) else: count = count + 1 print("Number of frame skipped in the video= " + str(count)) # compute the fps fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer) print("FPS: " + str(fps)) #outframe = open("/home/ubuntu/Progress\ TA/Integrasi/rgb640/%d.jpg" % countsave, 'wb+') cv2.imwrite('%d.jpg' % countsave, frame_resized) # Save image... countsave = countsave + 1 key = cv2.waitKey(5) cv2.destroyAllWindows() freenect.sync_stop() nucleo.close() print("\nFINISH")
def get_frame(self, countif): global previous_frame global min_area global center global count global boxcolor global hog global frame_processed global timeout global font global center_fix global flag_track2 global prev_x_pixel global prev_y_pixel global counttrack2 global tetaperpixel global nucleo global distance global distance2 global teta global i global b global flag global pick2 global tracker global prev_distance2 def detect_people(frame, center, frame_out, bboxcolor=(0, 255, 0)): """ detect humans using HOG descriptor Args: frame: Returns: processed frame, center of every bb box """ centerxd = [] (rects, weights) = hog.detectMultiScale(frame, winStride=(8, 8), padding=(16, 16), scale=1.06) rects = non_max_suppression(rects, probs=None, overlapThresh=0.65) for (x, y, w, h) in rects: cv2.rectangle(frame_out, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) idx = 0 # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(frame_out, (xA, yA), (xB, yB), bboxcolor, 2) cv2.putText(frame_out, 'Person ' + str(idx), (xA, yA - 10), 0, 0.3, bboxcolor) idx = idx + 1 # calculate the center of the object centerxd.append([(xA + xB) / 2, (yA + yB) / 2]) return (frame, centerxd, pick) def background_subtraction(previous_frame, frame_resized_grayscale, min_area): """ This function returns 1 for the frames in which the area after subtraction with previous frame is greater than minimum area defined. Thus expensive computation of human detection face detection and face recognition is not done on all the frames. Only the frames undergoing significant amount of change (which is controlled min_area) are processed for detection and recognition. """ frameDelta = cv2.absdiff(previous_frame, frame_resized_grayscale) thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1] thresh = cv2.dilate(thresh, None, iterations=2) im2, cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) temp = 0 for c in cnts: # if the contour is too small, ignore it if cv2.contourArea(c) > min_area: temp = 1 return temp def encodex(x_pixel): if (x_pixel < 130): x_norm = 1 elif (x_pixel > 230): x_norm = 2 else: x_norm = 0 return x_norm if (countif < 1): # setup the serial port nucleo = serial.Serial() nucleo.port = '/dev/ttyACM0' nucleo.baud = 115200 nucleo.close() nucleo.open() nucleo.flush() time.sleep(2) print("connected to: " + nucleo.portstr) print("Running...") subject_label = 1 font = cv2.FONT_HERSHEY_SIMPLEX tracker = KCF.kcftracker( True, False, True, False) # hog, fixed_window, multiscale, lab # initialize the HOG descriptor/person detector hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) time.sleep(1) frame = np.zeros((480, 640, 3), np.uint8) flag_track2 = 0 count = 0 counttrack2 = 0 prev_y_pixel = 0 prev_x_pixel = 0 tetaperpixel = 0.994837 / 400.0 prev_distance2 = 0 # grab one frame at first to compare for background substraction frame, timestamp = freenect.sync_get_video() #time.sleep(5) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame_resized = imutils.resize(frame, width=min(400, frame.shape[1])) frame_resized_grayscale = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY) # initialize centroid center = [[frame_resized.shape[1] / 2, frame_resized.shape[0] / 2]] center_fix = [] # defining min cuoff area #min_area=(480/400)*frame_resized.shape[1] min_area = (0.01) * frame_resized.shape[1] boxcolor = (0, 255, 0) timeout = 0 #variable for counting time elapsed temp = 1 previous_frame = frame_resized_grayscale # retrieve new RGB frame image # Frame generation for Browser streaming with Flask... self.outframe = open("stream.jpg", 'wb+') cv2.imwrite("stream.jpg", frame) # Save image... return self.outframe.read() else: # start timer timer = cv2.getTickCount() starttime = time.time() frame, timestamp = freenect.sync_get_video() frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame_resized = imutils.resize(frame, width=min(400, frame.shape[1])) frame_resized_grayscale = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY) temp = background_subtraction(previous_frame, frame_resized_grayscale, min_area) # retrieve depth map depth, timestamp = freenect.sync_get_depth() depth = imutils.resize(depth, width=min(400, depth.shape[1])) depth2 = np.copy(depth) # orig = image.copy() if temp == 1: if (flag_track2 == 0): frame_processed, center_fix, pick2 = detect_people( frame_resized_grayscale, center, frame_resized, boxcolor) if (len(center_fix) > 0): i = 0 for b in center_fix: #print(b) #print("Point "+str(i)+": "+str(b[0])+" "+str(b[1])) x_pixel = b[1] y_pixel = b[0] rawDisparity = depth[(int)(x_pixel), (int)(y_pixel)] print("raw:" + str(rawDisparity)) distance = 1 / (-0.00307 * rawDisparity + 3.33) if (distance < 0): distance = 0.5 print("Distance : " + str(distance)) cv2.putText(frame_resized, "distance: {:.2f}".format(distance), (10, (frame_resized.shape[0] - (i + 1) * 25) - 50), font, 0.65, (0, 0, 255), 3) cv2.putText( frame_resized, "Point " + str(i) + ": " + str(b[0]) + " " + str(b[1]), (10, frame_resized.shape[0] - (i + 1) * 25), font, 0.65, (0, 0, 255), 3) i = i + 1 y_pix, x_pix = center_fix[0] endtime = time.time() #nucleo.write(("8,"+str(x_person)+","+str(y_person)).encode()) # send x_person and y_person if ((abs(prev_x_pixel - x_pix)) < 50 and (abs(prev_y_pixel - y_pix)) < 50): timeout = timeout + (endtime - starttime) if (timeout > 5): flag_track2 = 1 boxcolor = (255, 0, 0) else: nucleo.flush() nucleo.write("8,,,,,,,,,,,,".encode()) timeout = 0 boxcolor = (0, 255, 0) prev_y_pixel, prev_x_pixel = y_pix, x_pix # DEBUGGING # #print("Teta: " + str(teta) + "Distance: " + str(distance)) print("Timeout: " + str(timeout)) #print ("Distance : " + str(distance)) elif (len(center_fix) <= 0): timeout = 0 boxcolor = (0, 255, 0) nucleo.flush() nucleo.write("8,,,,,,,,,,,,".encode()) elif (flag_track2 == 1): if (counttrack2 == 0): iA, iB, iC, iD = pick2[0] tracker.init([iA, iB, iC - iA, iD - iB], frame_resized) counttrack2 = counttrack2 + 1 elif (counttrack2 == 1): print(pick2[0]) #print("iA:"+str(iA)+"iB:"+str(iB)+"iC:"+str(iC)+"iD:"+str(iD)) boundingbox = tracker.update( frame_resized) #frame had better be contiguous boundingbox = list(map(int, boundingbox)) cv2.rectangle(frame_resized, (boundingbox[0], boundingbox[1]), (boundingbox[0] + boundingbox[2], boundingbox[1] + boundingbox[3]), (255, 0, 0), 3) #GENERAL ASSUMPTION SINGLE PERSON TRACKING # start tracking... x_track = ((boundingbox[2]) / 2.0) + boundingbox[0] y_track = ((boundingbox[3]) / 2.0) + boundingbox[1] print("x:" + str(x_track) + "y:" + str(y_track)) x_center = (frame_resized.shape[1] + 1) / 2 y_center = (frame_resized.shape[0] + 1) / 2 print(x_center, y_center) # compute teta asumsi distance lurus rawDisparity2 = depth2[(int)(y_track), (int)(x_track)] print("raw2:" + str(rawDisparity2)) distance2 = 1 / (-0.00307 * rawDisparity2 + 3.33) if (distance2 < 0): distance2 = prev_distance2 prev_distance2 = distance2 #realx = (x_track-x_center)+(distance/30.0) #teta = math.atan(realx/distance) # if distance is tangensial #teta = math.asin((0.026458333*(x_track-x_center)/distance)) # if distance is euclidean teta = (x_track - x_center) * tetaperpixel print("teta2: " + str(teta)) print("Distance2 : " + str(distance2)) cv2.putText(frame_resized, "distance: {:.2f}".format(distance2), (10, (frame_resized.shape[0] - (i + 1) * 25) - 50), font, 0.65, (0, 0, 255), 3) cv2.putText( frame_resized, "Point " + str(0) + ": " + str(x_track) + " " + str(y_track), (10, frame_resized.shape[0] - (i + 1) * 25), font, 0.65, (0, 0, 255), 3) # send the teta and distance nucleo.flush() if (teta < 0.0): flag = nucleo.write( ("7," + format(teta, '1.2f') + "," + format(distance2, '1.3f')).encode()) elif (teta > 0.0): flag = nucleo.write( ("7," + format(teta, '1.3f') + "," + format(distance2, '1.3f')).encode()) print("WRITEIN1" + str(flag)) if (tracker.getpeakvalue() < 0.4): counttrack2 = 0 flag_track2 = 0 nucleo.flush() nucleo.write("8,,,,,,,,,,,,".encode()) print("WRITEOUT") #frame_resized = cv2.flip(frame_resized, 0) #cv2.imshow("Detected Human", frame_resized) #cv2.imshow("Depth", depth) # cv2.imshow("Original", frame) else: count = count + 1 #print("Number of frame skipped in the video= " + str(count)) # compute the fps fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer) #print("FPS: " + str(fps)) # Frame generation for Browser streaming with Flask... self.outframe = open("stream.jpg", 'wb+') cv2.imwrite("stream.jpg", frame_resized) # Save image... return self.outframe.read()
def behaviourDetect(): #zig = Zigbee() #mms_count = 100 detect_lying = False #mms = MMS() args = parse_args() if args.cpu: ctx = mx.cpu() else: ctx = mx.gpu(args.gpu_id) # parse image list image_list = ['messigray.png'] assert len(image_list) > 0, "No valid image specified to detect" prefix = args.prefix + args.network network = None detector = Detector(network, prefix, args.epoch, args.data_shape, (args.mean_r, args.mean_g, args.mean_b), ctx=ctx) # run detection global isNotQuit global numOfStanding global numOfSitting global numOfLying outRects = [[674, 0, 960, 245], [561, 370, 960, 540], [718, 217, 960, 466]] horRects = [[415, 84, 547, 439]] verRects = [[315, 194, 642, 344]] maxarea = 30000 minside = 125 a = True kcf = False cap = cv2.VideoCapture(args.video) #cap = cv2.VideoCapture(0) objects = [] pre_objects = [] fobjects = [] ret, frame = cap.read() cap.set(3, 1920) cap.set(4, 1080) frame = cv2.flip(frame, 1) angle = 30 frame = imutils.rotate(frame, angle) frame = cv2.flip(frame, 0) height, width = frame.shape[:2] frame = cv2.resize(frame, (width / 2, height / 2)) pre_Frame = frame cv2.imwrite('messigray.png', frame) test_iter = detector.im_detect(image_list, args.dir, args.extension, show_timer=args.show_timer) font = cv2.FONT_HERSHEY_SIMPLEX while (isNotQuit): detect_lying = False #if(mms_count < 100): # mms_count += 1 start = timer() #KCF track if (kcf): ret1, framekcf = cap.read() '''framekcf = cv2.flip( framekcf, 1 ) angle = 30 framekcf = imutils.rotate(framekcf, angle) framekcf = cv2.flip( framekcf, 0 )''' #cap.set(3,1920) #cap.set(4,1080) if not ret1: break height, width = framekcf.shape[:2] framekcf = cv2.resize(framekcf, (width / 2, height / 2)) st = 0 sit = 0 ly = 0 for objecta in (objects): tracker = KCF.kcftracker( False, True, False, False) #hog, fixed_window, multiscale, lab tracker.init([ objecta.xmin, objecta.ymin, objecta.xmax - objecta.xmin, objecta.ymax - objecta.ymin ], pre_Frame) pre_Frame = framekcf boundingbox = tracker.update(framekcf) boundingbox = map(int, boundingbox) xmin, ymin, xmax, ymax = boundingbox[0], boundingbox[1], ( boundingbox[0] + boundingbox[2]), (boundingbox[1] + boundingbox[3]) print('fdfdsg', float(ymax - ymin) / (xmax - xmin)) if isInRects([(xmin + xmax) / 2, (ymax + ymin) / 2], outRects) or ((xmax - xmin) * (ymax - ymin) > maxarea): continue #filter the big box or box is in incorrect position if objecta.label == 0: if isInRects([(xmin + xmax) / 2, (ymax + ymin) / 2], verRects) and (ymax - ymin) > 125 and ( ymax - ymin) / (xmax - xmin) > 1.3: objecta.label = 2 elif isInRects([(xmin + xmax) / 2, (ymax + ymin) / 2], horRects) and (xmax - xmin) > 125 and ( xmax - xmin) / (ymax - ymin) > 1.3: objecta.label = 2 else: st += 1 if objecta.label == 1: sit += 1 if objecta.label == 2: ly += 1 cv2.rectangle(framekcf, (xmin, ymin), (xmax, ymax), colors[objecta.label], 1) cv2.putText(framekcf, CLASSES[objecta.label], (xmin, ymin), font, 0.5, colors[objecta.label], 1, cv2.LINE_AA) #framekcf = cv2.resize(framekcf,(width,height)) numOfStanding = st numOfSitting = sit numOfLying = ly #~KCF track #pre process next frame ret, frame = cap.read() '''frame = cv2.flip( frame, 1 ) angle = 30 frame = imutils.rotate(frame, angle) frame = cv2.flip( frame, 0 )''' #cap.set(3,1920) #cap.set(4,1080) if not ret: break frame = cv2.resize(frame, (width / 2, height / 2)) pre_Frame = frame cv2.imwrite('messigray.png', frame) test_iter = detector.im_detect(image_list, args.dir, args.extension, show_timer=args.show_timer) cv2.imshow("img", framekcf) kcf = False pre_objects = objects objects = [] #~pre process next frame else: #detection every 2 frame dets = detector.detect(test_iter, args.show_timer) #visualize detection for k, det in enumerate(dets): height = frame.shape[0] width = frame.shape[1] for i in range(det.shape[0]): cls_id = int(det[i, 0]) if cls_id >= 0: score = det[i, 1] if score > args.thresh: xmin = int(det[i, 2] * width) ymin = int(det[i, 3] * height) xmax = int(det[i, 4] * width) ymax = int(det[i, 5] * height) #cv2.rectangle(frame,(xmin,ymin),(xmax, ymax),(0,255,255),1) class_name = str(cls_id) if CLASSES and len(CLASSES) > cls_id: class_name = CLASSES[cls_id] #zig.count_detec(cls_id) objecta = Object(xmin, ymin, xmax, ymax, cls_id, score, 1) objects.append(objecta) fobjects = [] #t = threading.Thread(target=zig.send_zigbee, args=()) #t.start() #filter object overlap for aa in range(len(objects)): for bb in range(aa + 1, len(objects)): iou1 = iou_fiter([ objects[aa].xmin, objects[aa].ymin, objects[aa].xmax, objects[aa].ymax ], [ objects[bb].xmin, objects[bb].ymin, objects[bb].xmax, objects[bb].ymax ]) if iou1 > 0.6 and iou1 <= 1: if objects[aa].score > objects[bb].score: fobjects.append(objects[bb]) else: fobjects.append(objects[aa]) for objecta in (fobjects): try: objects.remove(objecta) except: print ' ' #~filter object overlap #correct object label for aa in range(len(objects)): for bb in range(len(pre_objects)): iou1 = iou([ objects[aa].xmin, objects[aa].ymin, objects[aa].xmax, objects[aa].ymax ], [ pre_objects[bb].xmin, pre_objects[bb].ymin, pre_objects[bb].xmax, pre_objects[bb].ymax ]) if iou1 > 0.6 and iou1 <= 1 and objects[ aa].label != pre_objects[bb].label: objects[aa].newlabel = pre_objects[bb].newlabel + 1 if objects[aa].newlabel <= 14: objects[aa].label = pre_objects[bb].label else: objects[aa].newlabel = 1 #~correct object label st = 0 sit = 0 ly = 0 for objecta in (objects): xmin, ymin, xmax, ymax = objecta.xmin, objecta.ymin, objecta.xmax, objecta.ymax if isInRects([(xmin + xmax) / 2, (ymax + ymin) / 2], outRects) or ((xmax - xmin) * (ymax - ymin) > maxarea): continue if objecta.label == 0: if isInRects([(xmin + xmax) / 2, (ymax + ymin) / 2], verRects) and (ymax - ymin) > 125 and ( ymax - ymin) / (xmax - xmin) > 1.3: objecta.label = 2 elif isInRects([(xmin + xmax) / 2, (ymax + ymin) / 2], horRects) and (xmax - xmin) > 125 and ( xmax - xmin) / (ymax - ymin) > 1.3: objecta.label = 2 else: st += 1 if objecta.label == 1: sit += 1 if objecta.label == 2: detect_lying = True ly += 1 cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), colors[objecta.label], 1) cv2.putText(frame, CLASSES[objecta.label], (xmin, ymin), font, 0.5, colors[objecta.label], 1, cv2.LINE_AA) #~visualize detection frame = cv2.resize(frame, (width, height)) #resize frame cv2.imshow("img", frame) #show video numOfStanding = st numOfSitting = sit numOfLying = ly '''if(detect_lying and mms_count == 100): mms_count = 0 frame_mms = cv2.resize(frame,(420,320)) cv2.imwrite('mms_save.png',frame_mms) tt = threading.Thread(target=mms.send_mms, args=()) tt.start()''' kcf = True #~detection time_elapsed = timer() - start #print("Detection timessssss for {} images: {:.4f} sec fps {:.4f}".format( # 1, time_elapsed, 1/time_elapsed)) k = cv2.waitKey(1) & 0xFF if k == ord('q'): isNotQuit = False break cap.release() cv2.destroyAllWindows()
def behaviourDetect(): args = parse_args() if args.cpu: ctx = mx.cpu() else: ctx = mx.gpu(args.gpu_id) # parse image list image_list = ['messigray.png'] assert len(image_list) > 0, "No valid image specified to detect" prefix = args.prefix + args.network network = None detector = Detector(network, prefix, args.epoch, args.data_shape, (args.mean_r, args.mean_g, args.mean_b), ctx=ctx) # run detection global isNotQuit global numOfStanding global numOfSitting global numOfLying a = True kcf = False cap = cv2.VideoCapture(args.video) #cap = cv2.VideoCapture(0) objects = [] pre_objects = [] fobjects = [] ret, frame = cap.read() height, width = frame.shape[:2] frame = cv2.resize(frame, (width / 2, height / 2)) pre_Frame = frame cv2.imwrite('messigray.png', frame) test_iter = detector.im_detect(image_list, args.dir, args.extension, show_timer=args.show_timer) font = cv2.FONT_HERSHEY_SIMPLEX while (isNotQuit): start = timer() #KCF track if (kcf): ret1, framekcf = cap.read() if not ret1: break height, width = framekcf.shape[:2] framekcf = cv2.resize(framekcf, (width / 2, height / 2)) st = 0 sit = 0 ly = 0 for objecta in (objects): tracker = KCF.kcftracker( False, True, False, False) #hog, fixed_window, multiscale, lab tracker.init([ objecta.xmin, objecta.ymin, objecta.xmax - objecta.xmin, objecta.ymax - objecta.ymin ], pre_Frame) pre_Frame = framekcf boundingbox = tracker.update(framekcf) boundingbox = map(int, boundingbox) cv2.rectangle(framekcf, (boundingbox[0], boundingbox[1]), (boundingbox[0] + boundingbox[2], boundingbox[1] + boundingbox[3]), (255, 255, 0), 1) if objecta.label == 0: st += 1 if objecta.label == 1: sit += 1 if objecta.label == 2: ly += 1 cv2.putText(framekcf, CLASSES[objecta.label] + str(objecta.score), (boundingbox[0], boundingbox[1]), font, 0.3, (255, 255, 255), 1, cv2.LINE_AA) framekcf = cv2.resize(framekcf, (width, height)) numOfStanding = st numOfSitting = sit numOfLying = ly #~KCF track #pre process next frame ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, (width / 2, height / 2)) pre_Frame = frame cv2.imwrite('messigray.png', frame) test_iter = detector.im_detect(image_list, args.dir, args.extension, show_timer=args.show_timer) cv2.imshow("img", framekcf) kcf = False pre_objects = objects objects = [] #~pre process next frame else: #detection every 2 frame dets = detector.detect(test_iter, args.show_timer) #visualize detection for k, det in enumerate(dets): height = frame.shape[0] width = frame.shape[1] for i in range(det.shape[0]): cls_id = int(det[i, 0]) if cls_id >= 0: score = det[i, 1] if score > args.thresh: xmin = int(det[i, 2] * width) ymin = int(det[i, 3] * height) xmax = int(det[i, 4] * width) ymax = int(det[i, 5] * height) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 255), 1) class_name = str(cls_id) if CLASSES and len(CLASSES) > cls_id: class_name = CLASSES[cls_id] objecta = Object(xmin, ymin, xmax, ymax, cls_id, score, 1) objects.append(objecta) fobjects = [] #filter object overlap for aa in range(len(objects)): for bb in range(aa + 1, len(objects)): iou1 = iou_fiter([ objects[aa].xmin, objects[aa].ymin, objects[aa].xmax, objects[aa].ymax ], [ objects[bb].xmin, objects[bb].ymin, objects[bb].xmax, objects[bb].ymax ]) if iou1 > 0.6 and iou1 <= 1: if objects[aa].score > objects[bb].score: fobjects.append(objects[bb]) else: fobjects.append(objects[aa]) for objecta in (fobjects): try: objects.remove(objecta) except: print ' ' #~filter object overlap #correct object label for aa in range(len(objects)): for bb in range(len(pre_objects)): iou1 = iou([ objects[aa].xmin, objects[aa].ymin, objects[aa].xmax, objects[aa].ymax ], [ pre_objects[bb].xmin, pre_objects[bb].ymin, pre_objects[bb].xmax, pre_objects[bb].ymax ]) if iou1 > 0.6 and iou1 <= 1 and objects[ aa].label != pre_objects[bb].label: objects[aa].newlabel = pre_objects[bb].newlabel + 1 if objects[aa].newlabel <= 14: objects[aa].label = pre_objects[bb].label else: objects[aa].newlabel = 1 #~correct object label st = 0 sit = 0 ly = 0 for objecta in (objects): cv2.rectangle(frame, (objecta.xmin, objecta.ymin), (objecta.xmax, objecta.ymax), (0, 255, 0), 1) if objecta.label == 0: st += 1 if objecta.label == 1: sit += 1 if objecta.label == 2: ly += 1 cv2.putText(frame, CLASSES[objecta.label] + str(objecta.score), (objecta.xmin, objecta.ymin), font, 0.3, (255, 255, 255), 1, cv2.LINE_AA) #~visualize detection frame = cv2.resize(frame, (width * 2, height * 2)) #resize frame cv2.imshow("img", frame) #show video numOfStanding = st numOfSitting = sit numOfLying = ly kcf = True #~detection time_elapsed = timer() - start #print("Detection timessssss for {} images: {:.4f} sec fps {:.4f}".format( # 1, time_elapsed, 1/time_elapsed)) k = cv2.waitKey(1) & 0xFF if k == ord('q'): isNotQuit = False break cap.release() cv2.destroyAllWindows()
def detection(model,config): # Tracker if config.USE_TRACKER: import sys sys.path.append(os.getcwd()+'/rod/kcf') import KCF tracker = KCF.kcftracker(False, True, False, False) tracker_counter = 0 track = False print("> Building Graph") # tf Session Config tf_config = model.tf_config detection_graph = model.detection_graph category_index = model.category_index with detection_graph.as_default(): with tf.Session(graph=detection_graph,config=tf_config) as sess: # start Videostream vs = WebcamVideoStream(config.VIDEO_INPUT,config.WIDTH,config.HEIGHT).start() # Define Input and Ouput tensors tensor_dict = model.get_tensordict(['num_detections', 'detection_boxes', 'detection_scores','detection_classes', 'detection_masks']) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Mask Transformations if 'detection_masks' in tensor_dict: # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = reframe_box_masks_to_image_masks( detection_masks, detection_boxes, vs.real_height, vs.real_width) detection_masks_reframed = tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0) if config.SPLIT_MODEL: score_out = detection_graph.get_tensor_by_name('Postprocessor/convert_scores:0') expand_out = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1:0') score_in = detection_graph.get_tensor_by_name('Postprocessor/convert_scores_1:0') expand_in = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1_1:0') # Threading score = model.score expand = model.expand gpu_worker = SessionWorker("GPU",detection_graph,tf_config) cpu_worker = SessionWorker("CPU",detection_graph,tf_config) gpu_opts = [score_out, expand_out] cpu_opts = [tensor_dict['detection_boxes'], tensor_dict['detection_scores'], tensor_dict['detection_classes'], tensor_dict['num_detections']] fps = FPS(config.FPS_INTERVAL).start() masks = None print('> Starting Detection') while vs.isActive(): # Detection if not (config.USE_TRACKER and track): if config.SPLIT_MODEL: # split model in seperate gpu and cpu session threads if gpu_worker.is_sess_empty(): # read video frame, expand dimensions and convert to rgb frame = vs.read() # put new queue gpu_feeds = {image_tensor: vs.expanded()} if config.VISUALIZE: gpu_extras = frame # for visualization frame else: gpu_extras = None gpu_worker.put_sess_queue(gpu_opts,gpu_feeds,gpu_extras) g = gpu_worker.get_result_queue() if g is None: # gpu thread has no output queue. ok skip, let's check cpu thread. pass else: # gpu thread has output queue. score,expand,frame = g["results"][0],g["results"][1],g["extras"] if cpu_worker.is_sess_empty(): # When cpu thread has no next queue, put new queue. # else, drop gpu queue. cpu_feeds = {score_in: score, expand_in: expand} cpu_extras = frame cpu_worker.put_sess_queue(cpu_opts,cpu_feeds,cpu_extras) c = cpu_worker.get_result_queue() if c is None: # cpu thread has no output queue. ok, nothing to do. continue continue # If CPU RESULT has not been set yet, no fps update else: boxes, scores, classes, num, frame = c["results"][0],c["results"][1],c["results"][2],c["results"][3],c["extras"] else: # default session frame = vs.read() output_dict = sess.run(tensor_dict, feed_dict={image_tensor: vs.expanded()}) num = output_dict['num_detections'][0] classes = output_dict['detection_classes'][0] boxes = output_dict['detection_boxes'][0] scores = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: masks = output_dict['detection_masks'][0] # reformat detection num = int(num) boxes = np.squeeze(boxes) classes = np.squeeze(classes).astype(np.uint8) scores = np.squeeze(scores) # Visualization vis = visualize_objectdetection(frame,boxes,classes,scores,masks,category_index,fps._glob_numFrames, config.MAX_FRAMES,fps.fps_local(),config.PRINT_INTERVAL,config.PRINT_TH, config.OD_DISPLAY_NAME,config.VISUALIZE,config.VIS_FPS,config.DISCO_MODE,config.ALPHA) if not vis: break # Activate Tracker if config.USE_TRACKER and num <= config.NUM_TRACKERS: tracker_frame = frame track = True first_track = True # Tracking else: frame = vs.read() if first_track: trackers = [] tracker_boxes = boxes for box in boxes[~np.all(boxes == 0, axis=1)]: tracker.init(conv_detect2track(box,vs.real_width, vs.real_height), tracker_frame) trackers.append(tracker) first_track = False for idx,tracker in enumerate(trackers): tracker_box = tracker.update(frame) tracker_boxes[idx,:] = conv_track2detect(tracker_box, vs.real_width, vs.real_height) vis = visualize_objectdetection(frame,tracker_boxes,classes,scores,masks,category_index,fps._glob_numFrames, config.MAX_FRAMES,fps.fps_local(),config.PRINT_INTERVAL,config.PRINT_TH, config.OD_DISPLAY_NAME,config.VISUALIZE,config.VIS_FPS,config.DISCO_MODE,config.ALPHA) if not vis: break tracker_counter += 1 #tracker_frame = frame if tracker_counter >= config.TRACKER_FRAMES: track = False tracker_counter = 0 fps.update() # End everything vs.stop() fps.stop() if config.SPLIT_MODEL: gpu_worker.stop() cpu_worker.stop()