def is_empty(result_frame): input_size = size frame_size = result_frame.shape[:2] image_data = cv2.resize(result_frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # detect on full image or part of image batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score_human ) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = result_frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) summ_of_obj_probabilities = sum([sum(i) for i in bboxes]) return summ_of_obj_probabilities == 0
def run_DL(_frame): # if pt_cfg.POLYTRACK.DL_DARK_SPOTS: # dark_spots = pt_cfg.POLYTRACK.RECORDED_DARK_SPOTS # if len(dark_spots): # _frame = map_darkspots(_frame, dark_spots) # else: # pass # else: # pass _frame = cv2.cvtColor(_frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(_frame) frame_size = _frame.shape[:2] image_data = cv2.resize(_frame, (cfg.YOLO.INPUT_SIZE, cfg.YOLO.INPUT_SIZE)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=pt_cfg.POLYTRACK.MAX_OUTPUT_SIZE_PER_CLASS, max_total_size=pt_cfg.POLYTRACK.MAX_TOTAL_SIZE, iou_threshold=pt_cfg.POLYTRACK.DL_IOU_THRESHOLD, score_threshold=pt_cfg.POLYTRACK.DL_SCORE_THRESHOLD) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = _frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) _detections = dl_detections_process(pred_bbox) return _detections
def get_detected_zone(result_frame, bodyguard=['helmet'], forbidden=False): input_size = size frame_size = result_frame.shape[:2] image_data = cv2.resize(result_frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # detect on full image or part of image batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score_human ) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = result_frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # if we control emptiness of a room if forbidden: return bboxes obj_detections = [] image = Image.fromarray(result_frame) for i in range(valid_detections.numpy()[0]): # save persons parts image_tmp = image.crop((bboxes[i][0] - 10, bboxes[i][1] - 10, bboxes[i][2] + 10, bboxes[i][3] + 10)) image_tmp = cv2.cvtColor(np.array(image_tmp), cv2.COLOR_BGR2RGB) obj_detections.append(detect_on_person(image_tmp, bodyguard)) pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]] image, violation = utils.draw_bbox(result_frame, pred_bbox, obj_detections, obj_threshold=score_obj) return image, violation
def detector(images_coming, threshold, prop): FLAGS(sys.argv) config = ConfigProto() config.gpu_options.allow_growth = True input_size = prop['size'] # load model saved_model_loaded = tf.saved_model.load(prop['weights'], tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, org_image in enumerate(images_coming, 1): original_image = cv2.cvtColor(org_image, 1) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # run non max suppression on detections boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=prop['iou'], score_threshold=threshold) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # if count flag is enabled, perform counting of objects if prop['count']: # count objects found counted_classes = count_objects(pred_bbox, by_class=True, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(original_image, pred_bbox, prop['info'], counted_classes, allowed_classes=allowed_classes) else: image = utils.draw_bbox(original_image, pred_bbox, prop['info'], allowed_classes=allowed_classes) image = Image.fromarray(image.astype(np.uint8)) image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) return image, counted_classes, pred_bbox
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # get video name by using split method video_name = video_path.split('/')[-1] video_name = video_name.split('.')[0] if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) firstFrame = True frame_num = 0 while True: return_value, frame_1 = vid.read() pts = [] aa = [] bb = [] cc = [] dd = [] while firstFrame: def click_event(event, x, y, flags, param): global pts if event == cv2.EVENT_LBUTTONDOWN: pts.append((x, y)) cv2.circle(frame_1, center=(x, y), radius=5, color=(0, 0, 255), thickness=-1) strXY = str(x) + " " + str(y) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(frame_1, strXY, (x, y), font, 0.5, (255, 255, 0), 2) elif event == cv2.EVENT_RBUTTONDOWN: if pts: pts.pop() cv2.imshow('bobur', frame_1) cv2.imshow('bobur', frame_1) cv2.setMouseCallback('bobur', click_event) if cv2.waitKey(1) & 0xFF == ord('c'): firstFrame = False break if len(pts) >= 4: aa.append(pts[0]) bb.append(pts[1]) cc.append(pts[2]) dd.append(pts[3]) print(aa, bb, cc, dd) a, b, c, d, e, f, g, h = [209, 1040], [331, 197], [1124, 197], [ 1907, 850 ], [0, 0], [1920, 0], [1920, 1080], [0, 1080] # e,f,g,h = [0,0],[1920,0],[1920,1080],[0,1080] external_poly = [ np.array([e, b, c, f]), np.array([f, c, d, g]), np.array([g, d, a, h]), np.array([h, a, b, e]) ] frame = cv2.fillPoly(frame_1, external_poly, (0, 0, 0)) # cv2.line(frame,(209, 1040),(331,197),(255,0,0),2) # cv2.line(frame,(331, 197), (1124,197),(255,0,0),2) # cv2.line(frame,(1124,197),(1907,850),(255,0,0),2) # cv2.line(frame,(209, 1040),(1907,850),(255,0,0),2) # cv2.line(frame,a,b,(255,0,0),2) # cv2.line(frame,b,c,(255,0,0),2) # cv2.line(frame,c,d,(255,0,0),2) # cv2.line(frame,a,d,(255,0,0),2) if return_value: # frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) #rotate the video for mobile videos frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_num += 1 image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break if frame_num % 15 == 0: frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=100, max_total_size=100, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # print(pred_bbox[2]) out_boxes, out_scores, out_classes, num_boxes = pred_bbox # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only SELECTED DETECTION CLASSES) allowed_classes = ['person', 'car', 'truck', 'bus', 'motorbike'] # allowed_classes = ['car'] # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames) crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name) try: os.mkdir(crop_path) except FileExistsError: pass if frame_num % crop_rate == 0: final_path = os.path.join(crop_path, 'frame_' + str(frame_num)) try: os.mkdir(final_path) except FileExistsError: pass crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes) else: pass if FLAGS.count: # count objects found counted_classes = count_objects( pred_bbox, by_class=True, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() cv2.destroyAllWindows()
max_total_size=50, iou_threshold=0.45, score_threshold=0.50) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i])
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # get video name by using split method video_name = video_path.split('/')[-1] video_name = video_name.split('.')[0] if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_num += 1 image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) #allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames) crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name) try: os.mkdir(crop_path) except FileExistsError: pass if frame_num % crop_rate == 0: final_path = os.path.join(crop_path, 'frame_' + str(frame_num)) try: os.mkdir(final_path) except FileExistsError: pass crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes) else: pass if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, by_class = True, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # if not FLAGS.dont_show: # cv2.imshow("result", result) if FLAGS.output: out.write(result)
def iterate(lines, model, vid, frame_num): tracks = [] return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') cv2.destroyAllWindows() return False, tracks frame_size = frame.shape[:2] image_data = cv2.resize(frame, (FLAGS.size, FLAGS.size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [FLAGS.size, FLAGS.size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [FLAGS.size, FLAGS.size])) else: batch_data = tf.constant(image_data) pred_bbox = model.signatures['serving_default'](batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file #allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] allowed_classes = ['car', 'bus', 'truck'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() tracks.append( Rect(track.track_id, (int(bbox[0]), int(bbox[1])), (int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1])))) # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track #if FLAGS.info: # print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) for line in lines: cv2.line(frame, line.pt1, line.pt2, line.color, 3) cv2.line(frame, line.vertor_pt1, line.vertor_pt2, (255, 255, 0), 2) cv2.putText(frame, str(line.count), line.center, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.putText(frame, str(frame_num), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() return False, tracks return True, tracks
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) class_names = utils.read_class_names(cfg.YOLO.CLASSES) allowed_classes = ['person'] original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] image = utils.draw_bbox(frame, pred_bbox, allowed_classes=allowed_classes) if FLAGS.covid: distance = social_distance(pred_bbox, frame, allowed_classes) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def detect_video(url): """ config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = 416 saved_model_loaded = tf.saved_model.load('./checkpoints/yolov4Tiny-416', tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] """ try: name = random.random() vid = cv2.VideoCapture(url) # out = None currentFrame = 0 while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break currentFrame += 1 if ((currentFrame % 4) == 0) & ((currentFrame % 15) != 0): continue frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=0.45, score_threshold=0.25 ) #bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] if (currentFrame % 15) == 0: original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) predictions = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]] crop_detections(frame, predictions, 200, 50) image = utils.draw_bbox(frame, pred_bbox) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow(str(id), cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow(str(name), result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() return None except Exception as e: print('oof', e) cv2.destroyAllWindows() return None
def inference(preprocess_queue, inference_queue): import tensorflow as tf import core.utils as utils from tensorflow.python.saved_model import tag_constants from tensorflow.compat.v1 import InteractiveSession from tensorflow.compat.v1 import ConfigProto from core.functions import count_objects, crop_objects from core.config import cfg from core.utils import read_class_names import os import random from core.yolov4 import filter_boxes tf.keras.backend.clear_session() input_size = Parameters.input_size model = OutsourceContract.model framework = Parameters.framework tiny = OutsourceContract.tiny weights = Parameters.weights iou = Parameters.iou score = Parameters.score physical_devices = tf.config.experimental.list_physical_devices('GPU') try: if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) except: pass # configure gpu usage config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) # load model if framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=weights) else: saved_model_loaded = tf.saved_model.load(weights, tags=[tag_constants.SERVING]) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) count = Parameters.count info = Parameters.info crop = Parameters.crop while True: if not preprocess_queue.empty(): queueData = preprocess_queue.get() while not preprocess_queue.empty(): queueData = preprocess_queue.get() #preprocess_queue.task_done() images_data = queueData[0] name = queueData[1] original_image = queueData[2] #preprocess_queue.task_done() if framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if model == 'yolov3' and tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score) # 1.2ms # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # 1ms #-> no tf needed # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) # allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if crop: crop_path = os.path.join(os.getcwd(), 'detections', 'crop', image_name) try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) if count: # count objects found counted_classes = count_objects( pred_bbox, by_class=False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, counted_classes, allowed_classes=allowed_classes) else: boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, allowed_classes=allowed_classes) # 0.5ms image = Image.fromarray(image.astype(np.uint8)) # 0.3ms inference_queue.put((boxtext, image, name))
def main(_argv): avg=[] # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 #regression model load weight_path='./2_input_model_2-3.5%/' loaded_model = tf.keras.models.load_model(weight_path) # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num +=1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] #print("pred_bbox: ",pred_bbox[0]) #print("scores: ",pred_bbox[1]) #print("classes :",pred_bbox[2]) #print("num :",pred_bbox[3]) #print("width :",width) #print("height :",height) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) #print("boxs ",boxs) #print("scores ",scores) #print("classes ",classes) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) #print("indices ",indices) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) cv2.putText(frame, "using regress", (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2) #cv2.putText(frame, "Objects being detected: {}".format(count), (5, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 0, 255), 2) cv2.putText(frame, "frame# {}".format(frame_num), (750, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() if 'entrance' not in classes: if len(classes)>1: if(contains_duplicates(classes)==False): #color = (50, 89, 170) check_rect=0 width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) ########## set sticker as low priority############# if ((classes[0]=='mat' or 'sensor') and (classes[1]=='mat' or 'sensor')): print("*************NO STK**********************************") color = (50, 89, 170) x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 ################ else condition for sticker ###### else: print("*************USE STK**********************************") if ((classes[0]=='famSticker' or 'okmartSticker' or 'sevenSticker')): color = (60, 120, 40) x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) #### rario #### C1_x=boxs[0][0]+(boxs[0][2]/2) C1_y=boxs[0][1]+(boxs[0][3]/2) C2_x=bboxes[1][0]+(bboxes[1][2]/2) C2_y=bboxes[1][1]+(bboxes[1][3]]/2) Dx = (C2_x - C1_x); Dy = (C2_y - C1_y); #The two rectangles do not intersect, and there are two rectangles partially overlapping in the X-axis direction. The minimum distance is the distance between the lower line of the upper rectangle and the upper line of the lower rectangle if((Dx < ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + rint(bboxes[1][1]+bboxes[1][3])) / 2))): min_dist = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2); #The two rectangles do not intersect. There are two partially overlapping rectangles in the Y-axis direction. The minimum distance is the distance between the right line of the left rectangle and the left line of the right rectangle elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy < ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))): min_dist = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2); #Two rectangles do not intersect, two rectangles that do not overlap in the X-axis and Y-axis directions, the minimum distance is the distance between the two closest vertices, # Using the Pythagorean theorem, it is easy to calculate this distance elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))): int delta_x = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2); int delta_y = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3]))/ 2); min_dist = sqrt(delta_x * delta_x + delta_y * delta_y); #The intersection of two rectangles, the minimum distance is negative, return -1 else: min_dist = -1; if(classes[1]=='mat'): if((min_dist/Dy)<3): predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 else: print("not predict") elif(classes[1]=='sensor'): if((min_dist/Dx)<3): predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 else: print("not predict") elif((classes[1]=='famSticker' or 'okmartSticker' or 'sevenSticker')): color = (60, 120, 40) x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) #### rario #### C1_x=boxs[0][0]+(boxs[0][2]/2) C1_y=boxs[0][1]+(boxs[0][3]/2) C2_x=bboxes[1][0]+(bboxes[1][2]/2) C2_y=bboxes[1][1]+(bboxes[1][3]/2) Dx = (C2_x - C1_x) Dy = (C2_y - C1_y) #The two rectangles do not intersect, and there are two rectangles partially overlapping in the X-axis direction. The minimum distance is the distance between the lower line of the upper rectangle and the upper line of the lower rectangle if((Dx < ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + rint(bboxes[1][1]+bboxes[1][3])) / 2))): min_dist = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2) #The two rectangles do not intersect. There are two partially overlapping rectangles in the Y-axis direction. The minimum distance is the distance between the right line of the left rectangle and the left line of the right rectangle elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy < ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))): min_dist = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2) #Two rectangles do not intersect, two rectangles that do not overlap in the X-axis and Y-axis directions, the minimum distance is the distance between the two closest vertices, # Using the Pythagorean theorem, it is easy to calculate this distance elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))): int delta_x = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2) int delta_y = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3]))/ 2) min_dist = sqrt(delta_x * delta_x + delta_y * delta_y) #The intersection of two rectangles, the minimum distance is negative, return -1 else: min_dist = -1 if(classes[0]=='mat'): if((min_dist/Dy)<3): predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 else: print("not predict") elif(classes[0]=='sensor'): if((min_dist/Dx)<3): predictions = loaded_model.predict(reg_input) a1_pred = predictions[0] b1_pred = predictions[1] c1_pred = predictions[2] d1_pred = predictions[3] xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) start_point = (xmin, ymin) end_point = (xmax, ymax) rect1=xmax-xmin rect2=ymax-ymin check_rect=rect2/rect1 else: print("not predict") ############## ########################################## ######## check door size and display #########if check_rect>1 and frame_num !=104: print("check_rect:{}".format(check_rect)) if check_rect>1 : blk = np.zeros(frame.shape, np.uint8) cv2.rectangle(blk, start_point, end_point, color, cv2.FILLED) frame =cv2.addWeighted(frame, 1.0, blk, 0.5, 1) print("predict_BBox Coords (xmin, ymin, xmax, ymax): {}".format((xmin,ymin,xmax,ymax))) else: print("not show predicted bbox") ############################### ######## # select one entrace ######## #if classes.count('entrance')>1: # entrance_num=[] # iou_list=[] # iou_check=[] # for i in range(len(classes)): # if classes[i]=='entrance' # entrance_num.append(i) # if len(classes)>1: # if(contains_duplicates(classes)==False): # color = (50, 89, 170) # width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) # x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax # x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax # reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]]) # predictions = loaded_model.predict(reg_input) # a1_pred = predictions[0] # b1_pred = predictions[1] # c1_pred = predictions[2] # d1_pred = predictions[3] # xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred) # ###IOU### # GT_bbox_area = (xmax - xmin + 1) * ( ymax -ymin + 1) # ########### # ##check entrace## # Pred_bbox_area =(x_bottomright_p - x_topleft_p + 1 ) * ( y_bottomright_p -y_topleft_p + 1) # x_top_left =np.max([x_topleft_gt, x_topleft_p]) # y_top_left = np.max([y_topleft_gt, y_topleft_p]) # x_bottom_right = np.min([x_bottomright_gt, x_bottomright_p]) # y_bottom_right = np.min([y_bottomright_gt, y_bottomright_p]) # # intersection_area = (x_bottom_right- x_top_left + 1) * (y_bottom_right-y_top_left + 1) # # union_area = (GT_bbox_area + Pred_bbox_area - intersection_area) # # iou_check.append(intersection_area/union_area) # # for j in len(iou_check): # if entrance_num[j]<iou_check.max: # track.delete #if(int(track.track_id)>=3 or (int(track.track_id)>10 and int(track.track_id)<20 ) ): #frame_num ###################### draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] if(class_name=='entrance'): if( int(track.track_id)==1 and frame_num>121): print("skip Tracker ID: {}, Class: {}".format(str(track.track_id), class_name)) else: print("RED Tracker ID: {}, Class: {}".format(str(track.track_id), class_name)) blk = np.zeros(frame.shape, np.uint8) cv2.rectangle(blk,(int(bbox[0]*1.05), int(bbox[1]*1.05)), (int(bbox[2]*0.95), int(bbox[3]*0.95)), (255, 0, 0), cv2.FILLED) frame =cv2.addWeighted(frame, 1.0, blk, 0.5, 1) cv2.rectangle(frame, (int(bbox[0]*1.05), int(bbox[1]*1.05)), (int(bbox[2]*0.95), int(bbox[3]*0.95)), color, 2) cv2.rectangle(frame, (int(bbox[0]*1.05), int(bbox[1]*1.05-30)), (int(bbox[0]*1.05)+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1]*1.05)), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]*1.05), int(bbox[1]*1.05-10)),0, 0.75, (255,255,255),2) # if enable info flag then print details about each track if FLAGS.info: print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) avg.append(fps) print("avg fps {}".format(statistics.mean(avg))) cv2.putText(frame, "FPS: %.2f" % fps, (50, 500), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (66, 245, 141), 2) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def detect(saved_model_loaded, infer, input_size, image_path): # config = ConfigProto() # session = InteractiveSession(config=config) # STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) # start = time.perf_counter() original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. count = 0 # get image name by using split method image_name = image_path.split('/')[-1] image_name = image_name.split('.')[0] images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) #infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # run non max suppression on detections boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=0.20, score_threshold=0.20) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox) # count objects found counted_classes = count_objects(pred_bbox, by_class=True, allowed_classes=['person']) # print(counted_classes.items()) if len(counted_classes.items()) != 0: count += counted_classes['person'] else: count += 0 # image = utils.draw_bbox(original_image, pred_bbox, False, counted_classes, allowed_classes=['person']) # image = Image.fromarray(image.astype(np.uint8)) # image.show() # image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) # cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image) update_db(count)
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size image_path = FLAGS.image original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) # image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. # image_data = image_data[np.newaxis, ...].astype(np.float32) images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] # image = utils.draw_bbox(original_image, pred_bbox) # image = utils.draw_bbox(image_data*255, pred_bbox) if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, by_class=True, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes) else: image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, allowed_classes=allowed_classes) image = Image.fromarray(image.astype(np.uint8)) image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output, image)
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # get video name by using split method video_name = video_path.split('/')[-1] video_name = video_name.split('.')[0] if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_num += 1 image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]] # print(pred_bbox[2]) out_boxes, out_scores, out_classes, num_boxes = pred_bbox # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only SELECTED DETECTION CLASSES) allowed_classes = ['car','truck','motorbike','bus'] # allowed_classes = ['car'] ################################################################################################################################# #Calculting the distance xmin, ymin, xmax, ymax cv2.line(img=frame, pt1=(595,940),pt2=(1567,940), color=(0, 0, 180), thickness=3, lineType=8, shift=0) for i, b in zip(out_boxes,out_classes): f = 1460 ## Focal length of the camera if i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<910 and int(b) == 2: wpix = i[2] - i[0] w = 1.7 #car width # D = round((f*w)/wpix, 2) # h = 1.2 #ORIGINAL === 1.6 # Most vehicles have a size that ranges from 1.5 – 1.8 meters high and widths of 1.6-1.7 meters. d_original =round((f*w)/wpix,2) d = d_original - 3 #2.3 from the camera to the front of the car 2.2 is for yolo car d = round(d,2) print("{} meters".format(d_original),end = ",") print("{} meters".format(d)) cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2) elif i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<940 and int(b) == 3: # wpix = i[3] - i[1] # w = 1.6 #car width # D = round((f*w)/wpix, 2) h = 0.75 # Most motorbikes height 75 cm d_original =round((f*h)/wpix,2) d = d_original - 2.3 -1.1 # 2.3 from the camera to the front of the car 1.1 is for yolo motorbike d = round(d,2) print("{} meters".format(d_original),end = ",") print("{} meters".format(d)) cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2) # ##Because bus is too long I will measure with width elif i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<940 and int(b) == 5: wpix = i[2] - i[0] # w = 1.6 # # D = round((f*w)/wpix, 2) # h = 3.5 # Most buses height 4.3 meters w = 2.3 d_original =round((f*w)/wpix,2) d = d_original - 2.3 # 2.3 from the camera to the front of the car BUSSS d = round(d,2) print("{} meters".format(d_original),end = ",") print("{} meters".format(d)) cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2) elif i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<940 and int(b) == 6: wpix = i[3] - i[1] # w = 1.6 #car width # D = round((f*w)/wpix, 2) h = 4.3 # Most train height 4.3 meters d_original =round((f*h)/wpix,2) d = d_original - 2.3 # 2.3 from the camera to the front of the car, NO TRAINNNNNNNNN d = round(d,2) print("{} meters".format(d_original),end = ",") print("{} meters".format(d)) cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2) # #Because truck is too long I will measure with width elif i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<910 and int(b) == 7: wpix = i[3] - i[1] # w = 2.7 #Most truck height 2.7 # D = round((f*w)/wpix, 2) h = 3 # Most truck height 4.3 meters # w = 1.8 d_original =round((f*h)/wpix,2) d = d_original - 3 # 2.3 from the camera to the front of the car 5 is for yolo TRUCKCKKKKKKK d = round(d,2) print("{} meters".format(d_original),end = ",") print("{} meters".format(d)) cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2) else: None ################################################################################################################################### # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames) crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name) try: os.mkdir(crop_path) except FileExistsError: pass if frame_num % crop_rate == 0: final_path = os.path.join(crop_path, 'frame_' + str(frame_num)) try: os.mkdir(final_path) except FileExistsError: pass crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes) else: pass if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, by_class = False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) # Loading the pretrained model weights saved_model_loaded = tf.saved_model.load(FLAGS.weights_path, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # Loading the stores configuration JSON file stores_config_filename = 'stores_sections.json' stores_config_filepath = os.path.join( os.path.dirname(os.path.abspath(__file__)), stores_config_filename) stores_sections = load_json_file(stores_config_filepath) #Creating engine to query from the database engine = get_db() # Getting the list of video filenames that had been processed processed_videos = pd.read_sql('SELECT DISTINCT name_video FROM counts', engine) processed_videos = processed_videos.name_video.tolist() # Get the current directory where this file person_tracker.py is located file_directory = os.getcwd() # Changing to the root directory (Google Colab root directory in this case) # to be able to extract the videofilenames in another location os.chdir('/content') # Getting the video filenames available on the repository mypath = FLAGS.videos_repository_path onlyfiles = [ f for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f)) ] # Changing back to the person_tracker.py directory to continue with the process of the videos os.chdir(file_directory) # Computing the video filanames that need to be processed videos_to_process = list(set(onlyfiles) - set(processed_videos)) print("Videos to process: ", len(videos_to_process)) # Loop for process all the videos that had not been processed for i in range(0, len(videos_to_process)): print(f"Processing video: {i+1}/{len(videos_to_process)}") # Initializing variables from the Flags values input_size = FLAGS.size video_path = os.path.join(mypath, videos_to_process[i]) print(video_path) output_csv_path = FLAGS.output_csv_path count_csv_path = FLAGS.count_csv_path file_name = video_path.split("/")[-1] #Extract the date and time information and camera number from the video_path string if len(re.findall('[0-9]{14}', video_path)) == 2: time_start_vid, time_end_vid = re.findall('[0-9]{14}', video_path) time_start_vid_dt = datetime.strptime(str(time_start_vid), '%Y%m%d%H%M%S') time_end_vid_dt = datetime.strptime(str(time_end_vid), '%Y%m%d%H%M%S') camera = int(re.findall(r'_([0-9]{1})_', video_path.lower())[0]) # Limit line points for the people counter # the only cameras of interest to count people in and out is camera 1 and camera 2 if camera == 1: startline = (614, 95) endline = (807, 95) elif camera == 2: startline = (305, 175) endline = (476, 175) else: startline = (0, 0) endline = (0, 0) # Extract the name of the store from the video_path string store_name = re.findall(r'/([a-z0-9\s]*)_*', video_path.lower())[-1] # Change the default name that video filenames have of san diego store if store_name == 'hermeco oficinas': store_name = 'san diego' # Begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) # Get video features out = None width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) frame_count = int(vid.get( cv2.CAP_PROP_FRAME_COUNT)) # Total number of frames in the video codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) delta_time = (time_end_vid_dt - time_start_vid_dt) / frame_count # get video ready to save locally if flag is set if FLAGS.output_vid: out = cv2.VideoWriter(FLAGS.output_vid, codec, fps, (width, height)) frame_num = 1 # Initialize the fields of the dataframe that will store the detections detections_df = pd.DataFrame({ 'Store_name': [], 'Start_date': [], 'End_date': [], 'current_datetime': [], 'Camera': [], 'Object': [], 'Id': [], 'X_center_original': [], 'Y_center_original': [], 'X_center_perspective': [], 'Y_center_perspective': [], 'X_min': [], 'Y_min': [], 'X_max': [], 'Y_max': [], 'Frame': [] }) temp = pd.DataFrame() # vector that will store the las 15 locations of each track pts = [deque(maxlen=15) for _ in range(10000)] counter_out = [] counter_in = [] start_process = time.time() # while video is running while True: # Get the frame image from the video return_value, frame = vid.read() # transform the defailt color of OpenCV frame BGR to RGB if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print( 'Video has ended or failed, try a different video format!') break #print('Frame #: ', frame_num) # Preprocessing the frame image frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # Getting all the bounding boxes of the detections and their respective confidence batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # Applying Non-maximum Suppression to get the best bounding box for each detection boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score_th) # Convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file #allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.display_count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression( boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # Computing the time has passed since the beggining of the video to the current frame delta_time_frame = delta_time * (frame_num - 1) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( f"Tracker ID: {str(track.track_id)}, Class: {class_name}, BBox Coords (xmin, ymin, xmax, ymax): {(int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))}" ) # computing the bottom center of the bounding box center = (int(((bbox[0]) + (bbox[2])) / 2), int(bbox[3])) # Loop for the diferent configured sections of the store for the current camera for sec in stores_sections[store_name][ f'camera_{camera}'].keys(): # Get the 4 points of the section bound_section_points = stores_sections[store_name][ f'camera_{camera}'][sec]['camera_view_points'] # Verify if the center point of the detection is in the section region mpltPath_path = mpltPath.Path(bound_section_points) inside = mpltPath_path.contains_point(list(center)) if inside: # Get the perspective transformation matrix transform_matrix = np.array( stores_sections[store_name][f'camera_{camera}'] [sec]['transformation_matrix']) # Apply the transformation matrix to transform the point to the blueprint perspective transformed_center = point_perspective_transform( center, transform_matrix)[0] break else: transformed_center = [0, 0] # Appending the center to the corrent track pts[track.track_id].append(center) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), color, thickness) height, width, _ = frame.shape ##cv2.line(frame,(0,int(3*height/6)),(width,int(3*height/6)),(0,0,255), thickness = 2) #cv2.line(frame,(193,183),(650,183),(0,0,255),2) cv2.line(frame, startline, endline, (0, 0, 255), 2) # split the bounding box bottom center coordinates center_x = center[0] if (camera == 2): center_y = int(bbox[3]) else: center_y = int(((bbox[1]) + (bbox[3])) / 2) # Counting if the track is leaving or entering the camera section # based in the direction the person is crossing a fixed line if (center_y <= int(startline[1] + 20)) and ( center_y >= int(startline[1] - 20)) and ( center_x >= int(startline[0] - 30)) and ( center_x <= int(endline[0] + 30)): if class_name == 'person': list_y = [i[1] for i in pts[track.track_id]] in_var = all(x < y for x, y in zip(list_y, list_y[1:])) out_var = all(x > y for x, y in zip(list_y, list_y[1:])) if in_var and len(list_y) > 1: counter_in.append(int(track.track_id)) elif out_var and len(list_y) > 1: counter_out.append(int(track.track_id)) # Adding the current track detection data to the dataframe temp = pd.DataFrame({ 'Store_name': [store_name], 'Start_date': [time_start_vid_dt], 'End_date': [time_end_vid_dt], 'current_datetime': [time_start_vid_dt + delta_time_frame], 'Camera': [int(camera)], 'Object': [class_name], 'Id': [int(track.track_id)], 'X_center_original': [int(center[0])], 'Y_center_original': [int(center[1])], 'X_center_perspective': [int(transformed_center[0])], 'Y_center_perspective': [int(transformed_center[1])], 'X_min': [int(bbox[0])], 'Y_min': [int(bbox[1])], 'X_max': [int(bbox[2])], 'Y_max': [int(bbox[3])], 'Frame': [int(frame_num)] }) detections_df = pd.concat([detections_df, temp], ignore_index=True) # Getting the total in and out counts total_count_in = len(set(counter_in)) total_count_out = len(set(counter_out)) cv2.putText(frame, 'Total Count In:' + str(len(set(counter_in))), (0, 130), 0, 1, (0, 0, 255), 2) cv2.putText(frame, 'Total Count Out:' + str(len(set(counter_out))), (0, 200), 0, 1, (0, 0, 255), 2) frame_num += 1 # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) #print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # if output flag is set, save video file if FLAGS.output_vid: out.write(result) #if cv2.waitKey(1) & 0xFF == ord('q'): break print("Total Processing time: ", time.time() - start_process) cv2.destroyAllWindows() # saving the detections data into a csv detections_df.to_csv(output_csv_path, index=False) print("The detections file was successfully saved!") # Adding the video counts data to the dataframe if (camera == 1) or (camera == 2): count_df_in = pd.DataFrame({ 'Store_name': [store_name], 'Start_date': [time_start_vid_dt], 'End_date': [time_end_vid_dt], 'Camera': [camera], 'Count': [total_count_in], 'inout': "In", 'name_video': [file_name] }) count_df_out = pd.DataFrame({ 'Store_name': [store_name], 'Start_date': [time_start_vid_dt], 'End_date': [time_end_vid_dt], 'Camera': [camera], 'Count': [total_count_out], 'inout': "Out", 'name_video': [file_name] }) else: count_df_in = pd.DataFrame({ 'Store_name': [store_name], 'Start_date': [time_start_vid_dt], 'End_date': [time_end_vid_dt], 'Camera': [camera], 'Count': [0], 'inout': "In", 'name_video': [file_name] }) count_df_out = pd.DataFrame({ 'Store_name': [store_name], 'Start_date': [time_start_vid_dt], 'End_date': [time_end_vid_dt], 'Camera': [camera], 'Count': [0], 'inout': "Out", 'name_video': [file_name] }) count_df = pd.concat([count_df_in, count_df_out], ignore_index=True) # saving the count data into into a csv count_df.to_csv(count_csv_path, index=False) print("The counts files were successfully saved!") #upload the detections data to the database upload_to_db(detections_df, 'tracker', 'append') # passing the dataframe #upload the count data to the database upload_to_db(count_df, 'counts', 'append')
def main(_argv): with open("./config_birdview.yml", "r") as ymlfile: bird_view_cfg = yaml.load(ymlfile) width_og, height_og = 0, 0 corner_points = [] for section in bird_view_cfg: corner_points.append(bird_view_cfg["image_parameters"]["p1"]) corner_points.append(bird_view_cfg["image_parameters"]["p2"]) corner_points.append(bird_view_cfg["image_parameters"]["p3"]) corner_points.append(bird_view_cfg["image_parameters"]["p4"]) width_og = int(bird_view_cfg["image_parameters"]["width_og"]) height_og = int(bird_view_cfg["image_parameters"]["height_og"]) img_path = bird_view_cfg["image_parameters"]["img_path"] size_height = bird_view_cfg["image_parameters"]["size_height"] size_width = bird_view_cfg["image_parameters"]["size_width"] tr = np.array([ bird_view_cfg["image_parameters"]["p4"][0], bird_view_cfg["image_parameters"]["p4"][1], ]) tl = np.array([ bird_view_cfg["image_parameters"]["p2"][0], bird_view_cfg["image_parameters"]["p2"][1], ]) br = np.array([ bird_view_cfg["image_parameters"]["p3"][0], bird_view_cfg["image_parameters"]["p3"][1], ]) bl = np.array([ bird_view_cfg["image_parameters"]["p1"][0], bird_view_cfg["image_parameters"]["p1"][1], ]) widthA = np.sqrt(((br[0] - bl[0])**2) + ((br[1] - bl[1])**2)) widthB = np.sqrt(((tr[0] - tl[0])**2) + ((tr[1] - tl[1])**2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0])**2) + ((tr[1] - br[1])**2)) heightB = np.sqrt(((tl[0] - bl[0])**2) + ((tl[1] - bl[1])**2)) maxHeight = max(int(heightA), int(heightB)) matrix, imgOutput = compute_perspective_transform(corner_points, maxWidth, maxHeight, cv2.imread(img_path)) height, width, _ = imgOutput.shape dim = (width, height) # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = "model_data/mars-small128.pb" encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == "tflite": interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures["serving_default"] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) output_video_1, output_video_2 = None, None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int """ width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) """ fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: black_img = cv2.imread("./black_bg.png") black_img = cv2.resize(black_img, dim, interpolation=cv2.INTER_AREA) return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print("Video has ended or failed, try a different video format!") break frame_num += 1 print("Frame #: ", frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255.0 image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == "tflite": interpreter.set_tensor(input_details[0]["index"], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]["index"]) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == "yolov3" and FLAGS.tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]), ) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]), ) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] ( boxes, scores, classes, valid_detections, ) = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score, ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ["person"] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText( frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2, ) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap("tab20b") colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) bbox_array = [] # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() bbox_array.append( (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle( frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2, ) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), ( int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1]), ), color, -1, ) cv2.putText( frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2, ) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format( str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int( bbox[3])), )) if len(bbox_array) >= 1: array_centroids, array_groundpoints = get_centroids_and_groundpoints( bbox_array) transformed_downoids = compute_point_perspective_transformation( matrix, array_centroids) # Show every point on the top view image for point in transformed_downoids: x, y = point cv2.circle(black_img, (x, y), 60, (0, 255, 0), 2) cv2.circle(black_img, (x, y), 3, (0, 255, 0), -1) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) # result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: if output_video_1 is None and output_video_2 is None: fourcc1 = cv2.VideoWriter_fourcc(*"MJPG") output_video_1 = cv2.VideoWriter( "./video.avi", fourcc1, 25, (frame.shape[1], frame.shape[0]), True) fourcc2 = cv2.VideoWriter_fourcc(*"MJPG") output_video_2 = cv2.VideoWriter( "./bird_view.avi", fourcc2, 25, (black_img.shape[1], black_img.shape[0]), True, ) elif output_video_1 is not None and output_video_2 is not None: output_video_1.write(frame) output_video_2.write(black_img) if cv2.waitKey(1) & 0xFF == ord("q"): break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = False config.gpu_options.per_process_gpu_memory_fraction = 0.1 _ = InteractiveSession(config=config) utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter( model_path=f'{FLAGS.weights}_{FLAGS.size}') interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load( f'{FLAGS.weights}_{FLAGS.size}', tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) all_start_time = None frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) Image.fromarray(frame) else: fps = float(frame_num) / (time.time() - all_start_time) print("fps=%.2f size=%d frames=%d deep=%s output=%s" % (fps, FLAGS.size, frame_num, "true" if FLAGS.deep else "false", FLAGS.output)) break frame_num += 1 if FLAGS.info: print("frame_num=%d" % frame_num) start_time = time.time() if all_start_time is None: all_start_time = time.time() image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for _, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker if FLAGS.deep: features = encoder(frame, bboxes) else: features = np.empty((len(bboxes), 0), np.float32) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) # calculate frames per second of running detections if FLAGS.info: fps = 1.0 / (time.time() - start_time) print("fps=%.2f" % fps) if not FLAGS.dont_show: if cv2.waitKey(1) & 0xFF == ord('q'): break if not FLAGS.dont_show: cv2.destroyAllWindows()
def startRecording_YOLO(): date_and_time = time.strftime("%Y%m%d-%H-%M-%S") #Stores current date and time in YYYY-MM-DD-HH:MM format vid_out_path = os.path.join(PROJECT_DIR, 'YoloV4', 'outputs', date_and_time + '.avi') #vid = cv2.VideoCapture(test_drive) #0 for webcam/Raspberry Pi Cam videothread = VideoThread(resolution=(640,480), framerate=30).start() width = int(videothread.stream.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(videothread.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(videothread.stream.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height)) #width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) #height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) #fps = int(vid.get(cv2.CAP_PROP_FPS)) #codec = cv2.VideoWriter_fourcc(*'XVID') #output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height)) frame_number = 0 freq = cv2.getTickFrequency() avg_fps = 0 #while video is running/recording while True: return_val, frame = videothread.read() #return_val, frame = vid.read() if return_val: #frame = cv2.flip(frame, -1) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video error, try another format') break frame_number += 1 #print('Frame #: ', frame_number) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data/ 255. #mage_data = np.expand_dims(frame_resized, axis = 0) #if floating_model: # image_data = (np.float32(image_data) - 127.5)/127.5 image_data = image_data[np.newaxis, ...].astype(np.float32) #Converts image data to a float32 type start_time = time.time() #TFLite Detections interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() prediction = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] #box = interpreter.get_tensor(output_details[0]['index'])[0] #scores = interpreter.get_tensor(output_details[2]['index'])[0] boxes, prediction_conf = filter_boxes(prediction[0], prediction[1], score_threshold=0.4, input_shape=tf.constant([input_size, input_size])) #Reshape = returns a new tensor that has the same values as tensor in the same order, but with a new shape given by shape #Shape = returns a 1-D integer tensor, represents the shape of the input boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores = tf.reshape(prediction_conf, (tf.shape(prediction_conf)[0], -1, tf.shape(prediction_conf)[-1])), max_output_size_per_class = 50, max_total_size = 50, iou_threshold = 0.45, score_threshold = 0.5 ) #convert the received data into numpy arrays, then slice out unused elements number_of_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0 : int(number_of_objects)] scores = scores.numpy()[0] scores = scores[0 : int(number_of_objects)] classes = classes.numpy()[0] classes = classes[0 : int(number_of_objects)] #format bounding boxes with normalized minimums and maximums of x and y original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) prediction_bbox = [bboxes, scores, classes, number_of_objects] #Read in all the class names from config and only allow certain ones to be detected (eases computation power) class_names = utils.read_class_names(cfg.YOLO.CLASSES) allowed_classes = ['traffic light', 'person', 'car', 'stop sign'] #loop through objects and get classification name, using only the ones allows in allowed_classes names = [] deleted_indx = [] for i in range(number_of_objects): classification_index = int(classes[i]) class_name = class_names[classification_index] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) #delete irrelevant detections (not in allowed_classes) bboxes = np.delete(bboxes, deleted_indx, axis = 0) scores = np.delete(scores, deleted_indx, axis = 0) #Feed tracker with encoded yolo detections detections_features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, detection_feature) for bbox, score, class_name, detection_feature in zip(bboxes, scores, names, detections_features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] #run non-maxima supression (reduces amount of detected entities to as little as possible) boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #Call tracker tracker.predict() tracker.update(detections) #update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() #if class_name == 'person': print('person found') #change frame to that which showcases the lane detection #frame = lane_detect.detect_edges(frame) #COMMENT OUT IF/WHEN ERROR OCCURS #distance approximation (barebones, needs more adjusting) cam_parameter = 18 #change with different cameras. Gets the detected distance closer to actual distance distance = (np.pi)/(bbox[2].item() + bbox[3].item()) * 1000 + cam_parameter det_dest = str(int(distance)) #draw bounded box on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(det_dest)) * 18, int(bbox[1])), color, -1) #cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) cv2.putText(frame, class_name + ": " + str(int(distance)), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) #calculate fps of running detections fps = 1.0/ (time.time() - start_time) avg_fps = avg_fps + fps #print("FPS: %.2f" % fps) cv2.putText(frame, "FPS: " + str(int(fps)), (width - 100, height - 20),0, 0.75, (255,255,255),2) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) cv2.imshow("Output Video", result) output_video.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() print('Average FPS: ', (avg_fps/frame_number)) print('Number of Frames: ', frame_number) videothread.stop()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images # load model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) # allowed_classes = ['person'] allowed_classes = ['Mask Person'] image = utils.draw_bbox(original_image, pred_bbox, allowed_classes = allowed_classes) if FLAGS.covid: distance = social_distance(pred_bbox,original_image, allowed_classes) image = Image.fromarray(image.astype(np.uint8)) if not FLAGS.dont_show: image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
def main(self, frame_data): # Definition of the parameters nms_max_overlap = 1.0 # set HyperParams size = 416 iou = 0.45 score = 0.50 info = False input_size = size self.person1.is_used = 0 self.person2.is_used = 0 self.person3.is_used = 0 self.person4.is_used = 0 out = None frame_data = cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB) image_data = cv2.resize(frame_data, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # start_time = time.time() batch_data = tf.constant(image_data) pred_bbox = self.infer(batch_data) # Yolo 모델 통과시켜서 바운딩 박스 좌표 반환 for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] # 좌표 pred_conf = value[:, :, 4:] # 벡터값 boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame_data.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if count: cv2.putText(frame_data, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) # print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = self.encoder(frame_data, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # DeepSort Tracking Start # Call the tracker self.tracker.predict() # load tracker self.tracker.update(detections) match_person = 0 # reset unmatched for center compare unmatched = [] # update tracks for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # draw bbox on screen # 이거 처리까지 하고 나서 보내야 할 것 같다. bbox = track.to_tlbr() class_name = track.get_class() # Matching index with index_stack if self.person1.is_exist(track.track_id): self.person1.centerX, self.person1.centerY = self.getCenter( bbox) self.draw_box(frame_data, self.person1.index_stack[0], colors, bbox) self.person1.is_used = 1 match_person += 1 elif self.person2.is_exist(track.track_id): self.person2.centerX, self.person2.centerY = self.getCenter( bbox) self.draw_box(frame_data, self.person2.index_stack[0], colors, bbox) self.person2.is_used = 1 match_person += 1 elif self.person3.is_exist(track.track_id): self.person3.centerX, self.person3.centerY = self.getCenter( bbox) self.draw_box(frame_data, self.person3.index_stack[0], colors, bbox) self.person3.is_used = 1 match_person += 1 elif self.person4.is_exist(track.track_id): self.person4.centerX, self.person4.centerY = self.getCenter( bbox) self.draw_box(frame_data, self.person4.index_stack[0], colors, bbox) self.person4.is_used = 1 match_person += 1 else: unmatched.append([track.track_id, bbox]) print('found new object!') unmatched = np.array(unmatched, dtype=object) # Missed Person Only 1 if match_person == 3 and len(unmatched) == 1: if self.person1.is_used == 0: self.person1.centerX, self.person1.centerY = self.getCenter( unmatched[0][1]) self.person1.index_stack.append(unmatched[0][0]) self.draw_box(frame_data, self.person1.index_stack[0], colors, unmatched[0][1]) self.person1.is_used = 1 match_person += 1 elif self.person2.is_used == 0: self.person2.centerX, self.person2.centerY = self.getCenter( unmatched[0][1]) self.person2.index_stack.append(unmatched[0][0]) self.draw_box(frame_data, self.person2.index_stack[0], colors, unmatched[0][1]) self.person2.is_used = 1 match_person += 1 elif self.person3.is_used == 0: self.person3.centerX, self.person3.centerY = self.getCenter( unmatched[0][1]) self.person3.index_stack.append(unmatched[0][0]) self.draw_box(frame_data, self.person3.index_stack[0], colors, unmatched[0][1]) self.person3.is_used = 1 match_person += 1 elif self.person4.is_used == 0: self.person4.centerX, self.person4.centerY = self.getCenter( unmatched[0][1]) self.person4.index_stack.append(unmatched[0][0]) self.draw_box(frame_data, self.person4.index_stack[0], colors, unmatched[0][1]) self.person4.is_used = 1 match_person += 1 else: print("ERROR : Something problem on object.is_used") # Missed Person Over 2 if match_person <= 3 and len(unmatched) >= 1: for unmatch in unmatched: if match_person >= 4: break else: # Apply center location Euclidean Distance EUD_min = self.get_EuclideanDistance(unmatch) print(EUD_min) if not len(str(EUD_min)) == 0: self.draw_box(frame_data, EUD_min, colors, unmatch[1]) match_person += 1 # if enable info flag then print details about each track if info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format( str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) result = cv2.cvtColor(frame_data, cv2.COLOR_RGB2BGR) return result
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. # get image name by using split method image_name = image_path.split('/')[-1] image_name = image_name.split('.')[0] images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # run non max suppression on detections boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=0.5, score_threshold=0.5) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) crop_path = os.path.join(os.getcwd(), 'detections', 'crop') try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes)
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen # names = {'6_d': 'Thomas Delaney', # '10_b': 'Leroy Sane', # '18_b': 'Leon Goretzka', # '25_b': 'Thomas Muller', # '5_d': 'Dan-Axel Zagadou', # '12_d': 'Zaragoza', # '4_b': 'Niklas Sule', # '14_d': 'Nico Schulz', # '11_d': 'Marco Reus', # 'Referee': 'Referee', # 'ball': 'ball', # '10_d': 'Thorgan Hazard', # '6_b': 'Joshua Kimmich ', # 'gk_b': 'Ron-Thorben Hoffmann(GK)', # '17_b': 'Jérôme Boateng', # '27_b': 'David Alaba', # '9_d': 'Erling Haaland', # '8_d': 'Mahmoud Dahoud', # 'gk_d': 'Luca Unbehaun(GK)', # '19_b': 'Alphonso Davies', # '29_b': 'Kingsley Coman', # '24_d': 'Marcel Schmelzer', # '9_b': 'Robert Lewandowski', # "23_d": 'Emre Can', # } # if class_name == 'Referee': # color = (0, 0, 0) if class_name == 'ball': # color = (255, 255, 255) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) # else: # try: # colors = {'b': (252, 3, 78), 'd': (250, 247, 80)} # color = colors[str(class_name.split('_')[-1])] # except KeyError: # pass # class_name = names[str(class_name)] # color = (250, 247, 80) # color = colors[int(track.track_id) % len(colors)] # color = [i * 255 for i in color] # cv2.rectangle(frame, (int(bbox[0]), int( # bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) # cv2.rectangle(frame, (int(bbox[0]), int( # bbox[1]-30)), (int(bbox[0])+(len(str(class_name)))*17, int(bbox[1])), color, -1) cv2.putText(frame, class_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 251, 46), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = cfg.PATH + '/model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size # tf saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] while True: data = sys.stdin.readline() if data: data = json.loads(data) if data['end']: break frame = np.array(data['frame_image'], dtype=np.uint8) image_data = frame / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # tf batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression( boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # ds = [] # for detection in detections: # d = dict() # d["bbox"] = detection.tlwh.tolist() # d["confidence"] = detection.confidence # d["class"] = detection.class_name # ds.append(d) # # # send data to Node (without tracking...) # print(json.dumps(ds)) #Call the tracker tracker.predict() tracker.update(detections) # Store tracks for json... tracks = [] # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue class_name = track.get_class() t = dict() bbs = track.to_tlbr().tolist() t["class"] = class_name bbox = dict() bbox["left"] = bbs[0] bbox["top"] = bbs[1] bbox["right"] = bbs[2] bbox["bottom"] = bbs[3] t["bbox"] = bbox t["id"] = track.track_id t["score"] = track.detection_actual_score tracks.append(t) #send data to Node! print(json.dumps(tracks))
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images # load model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. # get image name by using split method image_name = image_path.split('/')[-1] image_name = image_name.split('.')[0] images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # run non max suppression on detections boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) #allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_path = os.path.join(os.getcwd(), 'detections', 'crop', image_name) try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) # if ocr flag is enabled, perform general text extraction using Tesseract OCR on object detection bounding box if FLAGS.ocr: ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox) # if count flag is enabled, perform counting of objects if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, by_class=False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) image = Image.fromarray(image.astype(np.uint8)) if not FLAGS.dont_show: image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
def main(self, frame_data): # Definition of the parameters nms_max_overlap = 1.0 # set HyperParams size = 416 iou = 0.45 score = 0.50 info = False people_num = 4 input_size = size self.indexing.queue.clear() for k in range(people_num): self.indexing.put(k+1) out = None frame_data = cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB) image_data = cv2.resize(frame_data, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() batch_data = tf.constant(image_data) pred_bbox = self.infer(batch_data) # Yolo 모델 통과시켜서 바운딩 박스 좌표 반환 for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] # 좌표 pred_conf = value[:, :, 4:] # 벡터값 boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame_data.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if count: cv2.putText(frame_data, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,(0, 255, 0), 2) # print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = self.encoder(frame_data, bboxes) detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # DeepSort Tracking Start # Call the tracker self.tracker.predict() # load tracker self.tracker.update(detections) #check is_confirmed is_not_confirmed = 0 tracks_count = 0 for w, track in enumerate(self.tracker.tracks): tracks_count += 1 # print('count', tracks_count) # update tracks for index, track in enumerate(self.tracker.tracks): if not track.is_confirmed() or track.time_since_update > 1: is_not_confirmed += 1 continue if index-is_not_confirmed+1 > people_num: break bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen # 이거 처리까지 하고 나서 보내야 할 것 같다. for i in range(self.indexing.qsize()): check_index = self.indexing.get() if track.track_id == check_index: color = colors[int(track.track_id)*8 % len(colors)] color = [j * 255 for j in color] cv2.rectangle(frame_data, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame_data, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame_data, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) break else: self.indexing.put(check_index) if i == self.indexing.qsize() - 1: cng_index = self.indexing.get() print('index changed', track.track_id, '->', cng_index) # track.track_id = cng_index color = colors[int(cng_index)*8 % len(colors)] color = [j * 255 for j in color] cv2.rectangle(frame_data, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame_data, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(cng_index))) * 17, int(bbox[1])), color, -1) cv2.putText(frame_data, class_name + "-" + str(cng_index), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if info: print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, ( int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) # print("FPS: %.2f" % fps) result = cv2.cvtColor(frame_data, cv2.COLOR_RGB2BGR) return result
def _capture_loop(self): dt = 1 / self.fps self.FLAGS = self.FLAGS() config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(self.FLAGS) input_size = self.FLAGS.size saved_model_loaded = tf.saved_model.load(self.FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] logger.debug("Observation started") out = None while self.isrunning: return_value, frame = self.vid.read() if return_value: if len(self.frames) == self.max_frames: self.frames = self.frames[1:] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=self.FLAGS.iou, score_threshold=self.FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] image = utils.draw_bbox(frame, pred_bbox, self.FLAGS.info) self.frames.append(frame) time.sleep(dt) logger.info("Thread stopped successfully")
def main(_argv): # Definition of the parameters nms_max_overlap = 1.0 # initialize deep sort parameters encoder, tracker = init_deepsort_params() # load configuration for object detector input_size, video_path = load_obj_detector_cfg() # load tflite model if flag is set if FLAGS.framework == 'tflite': tfl = tf_lite_ngine() # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture vid = get_video_stream(video_path) # get video ready to save locally if flag is set out = None if FLAGS.output: out = init_video_out(vid) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = init_image_data(frame, input_size) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': boxes, pred_conf = tfl.detect(image_data) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = apply_tf_nms( boxes, pred_conf) # convert data to numpy arrays and slice out unused elements num_objects, bboxes, scores, classes = detections_to_np_array( valid_detections, boxes, scores, classes) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # fetch allowed object classes, ignore the rest classes names, deleted_indx = get_allowed_obj_classes(classes, num_objects) if FLAGS.count: show_tracked_object_count(names, frame) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # process detections with YOLO tracker frame = process_detections(tracker, detections, nms_max_overlap, frame) # calculate and print frames per second of running detections print_fps(start_time, time.time(), frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size #images = FLAGS.images video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 if FLAGS.shirt: allowed_classes = ['Shirt'] #ROI = if FLAGS.trouser: allowed_classes = ['Trousers'] if FLAGS.jeans: allowed_classes = ['Jeans'] if FLAGS.dress: allowed_classes = ['Dress'] if FLAGS.footwear: allowed_classes = ['Footwear'] if FLAGS.jacket: allowed_classes = ['Jacket'] if FLAGS.skirt: allowed_classes = ['Skirt'] if FLAGS.suit: allowed_classes = ['Suit'] # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) #cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) if FLAGS.color: PATH = './training.data' #(int(bbox[0])):(int(bbox[2])), (int(bbox[1])):(int(bbox[3])) #ROI = frame[(int(bbox[0]) +50) :(int(bbox[2]) - 50), (int(bbox[1])+ 50):(int(bbox[3])-50)] #ROI = frame[(int(bbox[1])) +15 :(int(bbox[3])-15),(int(bbox[0])+15):(int(bbox[2])-15)] ROI = frame[int((int(bbox[1]) + int(bbox[3])) / 2):int((int(bbox[1]) + int(bbox[3])) / 2) + 1, int((int(bbox[0]) + int(bbox[2])) / 2):int((int(bbox[0]) + int(bbox[2])) / 2) + 1] #ROI = frame[(int(bbox[1])):(int(bbox[3])),(int(bbox[0])):(int(bbox[2]))] #ROI = frame[int(0.5* (int(bbox[1] - 50)+ int(bbox[3] + 50))),int(0.5*(int(bbox[0] - 50) +int(bbox[2] + 50 )))] #print(ROI) color_histogram_feature_extraction.color_histogram_of_test_image( ROI) prediction = knn_classifier.main('training.data', 'test.data') #prediction = 'red' red = load_red('test.data') Red = str(red) #Red = str(Red_1) print('this is the variable of the red:- ' + str(Red)) green = load_green('test.data') Green = str(green) #Green = str(Green_1) print('this is the variable of the green:- ' + str(Green)) blue = load_blue('test.data') #Blue_1 = int(blue) Blue = str(blue) print('this is the variable of the blue:- ' + str(Blue)) #hsv = rgb_to_hsv(red,green,blue) #print("HSV: " + str(hsv)) if red and blue and green != None: HLS = colorsys.rgb_to_hls(red, green, blue) HUE = int(HLS[0]) Light = int(HLS[1]) Saturation = int(HLS[2]) print("HLS is equal to", HLS) print('HUE: ', HUE) print('LIGHT: ', Light) print('Saturation', Saturation) if red and blue and green != None: HSV = rgb_to_hsv(red, green, blue) HUE_1 = int(HSV[0]) Saturation_1 = int(HSV[1]) Value = int(HSV[2]) print("HSV is equal to", HSV) print('Hue: ', HUE_1) print('saturation: ', Saturation_1) print('value', Value) print(str(prediction) + " " + str(class_name)) if FLAGS.Fuzzy_black: #if str(59.7) <= Red < str(200.9) and str(74) <= Blue < str(207) and str(70) <= Green < str(203): if 0 <= HUE_1 < 210 and 0 <= Saturation_1 < 41 and 0 <= Value < 86: print("THIS IS THE black COLOR yaaaaaaaaaaaaaaaaaaaa") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "BLACK" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_red: #if str(139) <= Red < str(255) and str(0) <= Green < str(160) and str(0) <= Blue < str(128): if 0 <= HUE_1 < 348 and 47 <= Saturation_1 < 100 and 55 <= Value < 100: print( "THIS IS THE red COLOR redddddddddddddddddddddddddddddddddddd" ) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "RED" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_orange: #if str(255) <= Red < str(255) and str(69) <= Green < str(165) and str(0) <= Blue < str(80): if 9 <= HUE_1 < 39 and 69 <= Saturation_1 < 100 and Value == 100: print( "THIS IS THE ORANGE COLOR orangeeeeeeeeeeeeeeeeeeeeeeee" ) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "ORANGE" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_yellow: #if str(189) <= Red < str(255) and str(183) <= Green < str(255) and str(0) <= Blue < str(224): if 0 <= HUE_1 < 56 and 12 <= Saturation_1 < 100 and 74 <= Value < 100: print("THIS IS THE YELLOW COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "YELLOW" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_blue: #if str(0) <= Red < str(176) and str(0) <= Green < str(244) and str(112) <= Blue < str(255): if 187 <= HUE_1 < 240 and 21 <= Saturation_1 < 100 and 44 <= Value < 100: print("THIS IS THE BLUE COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "BLUE" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_white: #if str(240) <= Red < str(255) and str(228) <= Green < str(255) and str(215) <= Blue < str(255): if 0 <= HUE_1 < 340 and 0 <= Saturation_1 < 14 and 96 <= Value < 100: print("THIS IS THE WHITE COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "WHITE" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_purple: #if str(72) <= Red < str(255) and str(0) <= Green < str(230) and str(128) <= Blue < str(255): if 0 <= HUE_1 < 302 and 8 <= Saturation_1 < 100 and 50 <= Value < 100: print("THIS IS THE PURPLE COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "PURPLE" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_green: #if str(0) <= Red < str(173) and str(100) <= Green < str(255) and str(0) <= Blue < str(170): if 0 <= HUE_1 < 160 and 24 <= Saturation_1 < 100 and 39 <= Value < 100: print("THIS IS THE green COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "GREEN" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_brown: #if str(128) <= Red < str(255) and str(0) <= Green < str(248) and str(0) <= Blue < str(288): if 0 <= HUE_1 < 48 and 14 <= Saturation_1 < 100 and 50 <= Value < 100: print("THIS IS THE BROWN COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "BROWN" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_cyan: #if str(0) <= Red < str(244) and str(128) <= Green < str(255) and str(128) <= Blue < str(255): if 0 <= HUE_1 < 182 and 12 <= Saturation_1 < 100 and 50 <= Value < 100: print("THIS IS THE CYAN COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "CYAN" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_pink: #if str(199) <= Red < str(255) and str(20) <= Green < str(192) and str(133) <= Blue < str(203): if 322 <= HUE_1 < 351 and 25 <= Saturation_1 < 92 and 78 <= Value < 100: print("THIS IS THE PINK COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "PINK" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.black: if prediction == 'black': #ROI = frame[int((int(bbox[1]) + int(bbox[3]))/2):int((int(bbox[1]) + int(bbox[3]))/2)+1,int((int(bbox[0]) + int(bbox[2]))/2):int((int(bbox[0]) + int(bbox[2]))/2)+1] #color_histogram_feature_extraction.color_histogram_of_test_image(ROI) #prediction = knn_classifier.main('training.data','test.data') cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.blue: if prediction == 'blue': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.red: if prediction == 'red': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.yellow: if prediction == 'yellow': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.orange: if prediction == 'orange': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.violet: if prediction == 'violet': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.white: if prediction == 'white': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.green: if prediction == 'green': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) #cv2.putText(frame, class_name + " " + str(prediction) + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) #print('ferture data:' +" " + feature_data) #result_1 = np.asarray(frame) #result_1 = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) #cv2.imshow('color classifier', result_1) #print(color_histogram_feature_extraction.feature_data) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): global lock, buffer, bts # Definition of the parameters max_cosine_distance = 0.5 nn_budget = None nms_max_overlap = 0.8 counter = [] # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True input_size = FLAGS.size # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] frame_num = 0 # while video is running while True: with lock: bts += buffer buffer = b'' time.sleep(0.01) # jpghead = bts.find(b'\xff\xd8') # jpgend = bts.find(b'\xff\xd9') # if jpghead < 0 or jpgend < 0: # continue # if jpgend < jpghead: # # raise Exception("{}..{}".format(jpghead, jpgend)) # continue # print(jpghead, jpgend) # jpg=bts[jpghead:jpgend+2] # bts=bts[jpgend+2:] jpghead = bts.find(b'\xff\xd8') if jpghead >= 0: bts = bts[jpghead:] jpgend = bts.find(b'\xff\xd9') if jpghead < 0 or jpgend < 0: continue print(jpghead, jpgend) jpg = bts[0:jpgend + 2] bts = bts[jpgend + 2:] img = cv2.imdecode(np.frombuffer(jpg, dtype=np.uint8), cv2.IMREAD_UNCHANGED) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) frame = cv2.resize(img, (640, 480)) cv2.imshow('input', frame) cv2.waitKey(1) frame_num += 1 print('Frame #: ', frame_num) image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() bts = bts[-6000:] # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for value in pred_bbox.values(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=15, max_total_size=15, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) current_count = int(0) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) pts[track.track_id].append(center) for j in range(1, len(pts[track.track_id])): if pts[track.track_id][j - 1] is None or pts[ track.track_id][j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), color, thickness) height, width, _ = frame.shape cv2.line(frame, (0, int(3 * height / 6 + height / 2)), (width, int(3 * height / 6 + height / 2)), (0, 255, 0), thickness=2) cv2.line(frame, (0, int(3 * height / 6 - height / 2)), (width, int(3 * height / 6 - height / 2)), (0, 255, 0), thickness=2) center_y = int(((bbox[1]) + (bbox[3])) / 2) if center_y <= int(3 * height / 6 + height / 2) and center_y >= int(3 * height / 6 - height / 2): if class_name == 'Among_Us_Alive' or class_name == 'Among_Us_Dead': counter.append(int(track.track_id)) current_count += 1 # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) total_count = len(set(counter)) cv2.putText(frame, "Current Figurine Count: " + str(current_count), (0, 80), 0, 1, (0, 0, 255), 2) cv2.putText(frame, "Total Figurine Count: " + str(total_count), (0, 130), 0, 1, (0, 0, 255), 2) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # with lock: # bts = bts[:5000] # time.sleep(0.01) if cv2.waitKey(1) & 0xFF == ord('q'): break