def detect(self): if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] box = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)) score = tf.reshape(pred_conf, (tf.shape(pred_conf)[0], box.shape[1], -1)) boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(boxes=box, scores=score, max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] return pred_bbox
def inference(framework, images_data, model, tiny, saved_model_loaded, iou, score): if framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if model == 'yolov3' and tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # run non max suppression on detections boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score ) return boxes, scores, classes, valid_detections
def save_tf(): STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) print("load_config XYSCALE:{}".format(XYSCALE)) input_layer = tf.keras.layers.Input( [FLAGS.input_size, FLAGS.input_size, 3]) feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.num_detection_layer) bbox_tensors = [] prob_tensors = [] if FLAGS.num_detection_layer == 1: # yolo-custom output_tensors = decode(feature_maps[0], FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, 0, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) elif FLAGS.num_detection_layer == 2: # yolo-tiny for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) elif FLAGS.num_detection_layer == 3: # yolo for i, fm in enumerate(feature_maps): print("i:{}".format(i)) if i == 0: output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) elif i == 1: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) pred_bbox = tf.concat(bbox_tensors, axis=1) pred_prob = tf.concat(prob_tensors, axis=1) if FLAGS.framework == 'tflite': pred = (pred_bbox, pred_prob) else: boxes, pred_conf = filter_boxes( pred_bbox, pred_prob, score_threshold=FLAGS.score_thres, input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size])) pred = tf.concat([boxes, pred_conf], axis=-1) model = tf.keras.Model(input_layer, pred) utils.load_weights(model, FLAGS.weights, FLAGS.model, FLAGS.num_detection_layer) model.summary() model.save(FLAGS.output)
def detect_flowers(original_image): image_data = cv2.resize(original_image, (FLAGS.size, FLAGS.size)) / 255.0 image_data = np.asarray([image_data]).astype(np.float32) detection_interpreter.set_tensor(detection_input_details[0]['index'], image_data) detection_interpreter.invoke() pred = [ detection_interpreter.get_tensor(detection_output_details[i]['index']) for i in range(len(detection_output_details)) ] boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [FLAGS.size, FLAGS.size])) boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] return utils.detect_coordinates(original_image, pred_bbox)
def save_tf(): STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_layer = tf.keras.layers.Input( [FLAGS.input_size, FLAGS.input_size, 3]) feature_maps = YOLOv4(input_layer, NUM_CLASS) bbox_tensors = [] prob_tensors = [] for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE) elif i == 1: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) pred_bbox = tf.concat(bbox_tensors, axis=1) pred_prob = tf.concat(prob_tensors, axis=1) boxes, pred_conf = filter_boxes(pred_bbox, pred_prob, score_threshold=FLAGS.score_thres, input_shape=tf.constant( [FLAGS.input_size, FLAGS.input_size])) pred = tf.concat([boxes, pred_conf], axis=-1) model = tf.keras.Model(input_layer, pred) utils.load_weights(model, FLAGS.weights, FLAGS.model) model.summary() model.save(FLAGS.output)
def save_tf(): STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_layer = tf.keras.layers.Input( [FLAGS.input_size, FLAGS.input_size, 3]) feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.tiny) bbox_tensors = [] prob_tensors = [] if FLAGS.tiny: for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) else: for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) elif i == 1: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) pred_bbox = tf.concat(bbox_tensors, axis=1) pred_prob = tf.concat(prob_tensors, axis=1) if FLAGS.framework == 'tflite': pred = (pred_bbox, pred_prob) else: boxes, pred_conf = filter_boxes( pred_bbox, pred_prob, score_threshold=FLAGS.score_thres, input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size])) pred = tf.concat([boxes, pred_conf], axis=-1) model = tf.keras.Model(input_layer, pred) model.load_weights(FLAGS.weights) #utils.load_weights(model, FLAGS.weights, FLAGS.model, FLAGS.tiny) #weight파일일 경 #model.summary() #model.save('/checkpoints/yolov4-416') #model.save(FLAGS.output, save_format = 'tf') tf.saved_model.save( model, FLAGS.output ) #현재 이 저장 형태의 경우: assets/, variables/, saved_model.pb 형태로 저장됨.
def save_tf(): if FLAGS.license: cfg.YOLO.CLASSES = "./data/classes/custom.names" else: cfg.YOLO.CLASSES = "./data/classes/char.names" STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) #print(read_class_names(cfg.YOLO.CLASSES)) input_layer = tf.keras.layers.Input( [FLAGS.input_size, FLAGS.input_size, 3]) feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.tiny) bbox_tensors = [] prob_tensors = [] if FLAGS.tiny: for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) else: for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) elif i == 1: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) pred_bbox = tf.concat(bbox_tensors, axis=1) pred_prob = tf.concat(prob_tensors, axis=1) if FLAGS.framework == 'tflite': pred = (pred_bbox, pred_prob) else: boxes, pred_conf = filter_boxes( pred_bbox, pred_prob, score_threshold=FLAGS.score_thres, input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size])) pred = tf.concat([boxes, pred_conf], axis=-1) model = tf.keras.Model(input_layer, pred) utils.load_weights(model, FLAGS.weights, FLAGS.model, FLAGS.tiny) model.summary() model.save(FLAGS.output)
def save_tf(): STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_layer = tf.keras.layers.Input( [FLAGS.input_size, FLAGS.input_size, 3]) feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.tiny) bbox_tensors = [] prob_tensors = [] if FLAGS.tiny: for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) else: for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) elif i == 1: output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) else: output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, FLAGS.framework) bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) pred_bbox = tf.concat(bbox_tensors, axis=1) pred_prob = tf.concat(prob_tensors, axis=1) if FLAGS.framework == 'tflite': pred = (pred_bbox, pred_prob) else: boxes, pred_conf = filter_boxes( pred_bbox, pred_prob, score_threshold=FLAGS.score_thres, input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size])) pred = tf.concat([boxes, pred_conf], axis=-1) model = tf.keras.Model(input_layer, pred) model.load_weights(FLAGS.input_model_path) model.summary() # model.save(FLAGS.output_model_path) return model
def detect(self, image_data): self.interpreter.set_tensor(self.input_details[0]['index'], image_data) self.interpreter.invoke() pred = [ self.interpreter.get_tensor(self.output_details[i]['index']) for i in range(len(self.output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) return boxes, pred_conf
def _pred(infer, file_name, original_image, images_data, input_details=None, output_details=None): input_size = FLAGS.size if FLAGS.framework == 'tflite': infer.set_tensor(input_details[0]['index'], images_data) infer.invoke() pred = [infer.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = infer.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] image = utils.draw_bbox(original_image, pred_bbox) #image = utils.draw_bbox(image_data*255, pred_bbox) image = Image.fromarray(image.astype(np.uint8)) #image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(os.path.join(FLAGS.output, file_name), image) num_detect = pred_bbox[3][0] scores = pred_bbox[1][0] bboxs = pred_bbox[0][0] return num_detect, scores, bboxs
def get_bbox(self, image_bytes): STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = CONFIG.image_size cameraId = image_bytes["CameraId"] nparr = np.frombuffer(image_bytes["ImageBytes"], np.uint8) original_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) # print("Image shape: ",original_image.shape) image_h, image_w, _ = original_image.shape image_data = cv2.resize(original_image, (input_size[0], input_size[1])) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) self.interpreter.allocate_tensors() input_details = self.interpreter.get_input_details() output_details = self.interpreter.get_output_details() self.interpreter.set_tensor(input_details[0]['index'], images_data) self.interpreter.invoke() pred = [ self.interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size[0], input_size[1]])) boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=CONFIG.iou, score_threshold=CONFIG.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] detections = utils.bbox_details(original_image, pred_bbox) return original_image, detections, classes.numpy()
def save_tf(parameters): """Transform a darknet model of YOLO to a TensorFlow model Args: parameters (dictionary): input parameters - weights: path to the darknet weights - input_size: input size of the model - model: model to transform - weights_tf: path to save the tf weights Returns: [void]: """ STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config( tiny=False, model=parameters['model']) input_layer = tf.keras.layers.Input( [parameters['input_size'], parameters['input_size'], 3]) feature_maps = YOLO(input_layer, NUM_CLASS, parameters['model'], False) bbox_tensors = [] prob_tensors = [] for i, fm in enumerate(feature_maps): if i == 0: output_tensors = decode(fm, parameters['input_size'] // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, 'tf') elif i == 1: output_tensors = decode(fm, parameters['input_size'] // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, 'tf') else: output_tensors = decode(fm, parameters['input_size'] // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE, 'tf') bbox_tensors.append(output_tensors[0]) prob_tensors.append(output_tensors[1]) pred_bbox = tf.concat(bbox_tensors, axis=1) pred_prob = tf.concat(prob_tensors, axis=1) boxes, pred_conf = filter_boxes(pred_bbox, pred_prob, score_threshold=parameters['score_thres'], input_shape=tf.constant([ parameters['input_size'], parameters['input_size'] ])) pred = tf.concat([boxes, pred_conf], axis=-1) model = tf.keras.Model(input_layer, pred) utils.load_weights(model, parameters['weights'], parameters['model'], False) model.summary() model.save(parameters['weights_tf'])
def model_inference(self, image_input, interpreter, input_details, output_details): interpreter.set_tensor(input_details[0]['index'], image_input) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([self.input_size, self.input_size])) return boxes, pred_conf
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) # initiating pyttsx3 engine and thread engine = pyttsx3.init() thread = threading.Thread() while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=5, max_total_size=10, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] image = utils.draw_bbox(frame, pred_bbox) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): engine.stop() break # ========== voiceFeedback ========== class_names = utils.read_class_names(cfg.YOLO.CLASSES) allowed_classes = list(class_names.values()) valid_items = pred_bbox[3][0] valid_classes = pred_bbox[2][0] valid_boxes = pred_bbox[0][0] # section = (input_size/3) (H, W) = frame.shape[:2] res = [] for i in range(valid_items): (top, left, bottom, right) = valid_boxes[i] centerX = round((right + left) / 2) centerY = round((top + bottom) / 2) if centerX <= W / 3: w_pos = 'left ' elif centerX <= (W / 3 * 2): w_pos = 'center ' else: w_pos = 'right ' if centerY <= H / 3: h_pos = 'top ' elif centerY <= (H / 3 * 2): h_pos = 'mid ' else: h_pos = 'bottom ' res.append(h_pos + w_pos + allowed_classes[int(valid_classes[i])]) description = ', '.join(res) # Using pyttsx3 to play sound directly without saving the file via a thread if (not thread.is_alive()): thread.__init__(name="texToSpeech", target=textToSpeech, args=[engine, description]) thread.start() # ========= endVoiceFeedback ========== cv2.destroyAllWindows()
def iterate(lines, model, vid, frame_num): tracks = [] return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') cv2.destroyAllWindows() return False, tracks frame_size = frame.shape[:2] image_data = cv2.resize(frame, (FLAGS.size, FLAGS.size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [FLAGS.size, FLAGS.size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [FLAGS.size, FLAGS.size])) else: batch_data = tf.constant(image_data) pred_bbox = model.signatures['serving_default'](batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file #allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] allowed_classes = ['car', 'bus', 'truck'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() tracks.append( Rect(track.track_id, (int(bbox[0]), int(bbox[1])), (int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1])))) # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track #if FLAGS.info: # print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) for line in lines: cv2.line(frame, line.pt1, line.pt2, line.color, 3) cv2.line(frame, line.vertor_pt1, line.vertor_pt2, (255, 255, 0), 2) cv2.putText(frame, str(line.count), line.center, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.putText(frame, str(frame_num), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() return False, tracks return True, tracks
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images # load model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. # get image name by using split method image_name = image_path.split('/')[-1] image_name = image_name.split('.')[0] images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] # run non max suppression on detections boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) #allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_path = os.path.join(os.getcwd(), 'detections', 'crop', image_name) try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) # if ocr flag is enabled, perform general text extraction using Tesseract OCR on object detection bounding box if FLAGS.ocr: ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox) # if count flag is enabled, perform counting of objects if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, by_class=False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) image = Image.fromarray(image.astype(np.uint8)) if not FLAGS.dont_show: image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen # names = {'6_d': 'Thomas Delaney', # '10_b': 'Leroy Sane', # '18_b': 'Leon Goretzka', # '25_b': 'Thomas Muller', # '5_d': 'Dan-Axel Zagadou', # '12_d': 'Zaragoza', # '4_b': 'Niklas Sule', # '14_d': 'Nico Schulz', # '11_d': 'Marco Reus', # 'Referee': 'Referee', # 'ball': 'ball', # '10_d': 'Thorgan Hazard', # '6_b': 'Joshua Kimmich ', # 'gk_b': 'Ron-Thorben Hoffmann(GK)', # '17_b': 'Jérôme Boateng', # '27_b': 'David Alaba', # '9_d': 'Erling Haaland', # '8_d': 'Mahmoud Dahoud', # 'gk_d': 'Luca Unbehaun(GK)', # '19_b': 'Alphonso Davies', # '29_b': 'Kingsley Coman', # '24_d': 'Marcel Schmelzer', # '9_b': 'Robert Lewandowski', # "23_d": 'Emre Can', # } # if class_name == 'Referee': # color = (0, 0, 0) if class_name == 'ball': # color = (255, 255, 255) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) # else: # try: # colors = {'b': (252, 3, 78), 'd': (250, 247, 80)} # color = colors[str(class_name.split('_')[-1])] # except KeyError: # pass # class_name = names[str(class_name)] # color = (250, 247, 80) # color = colors[int(track.track_id) % len(colors)] # color = [i * 255 for i in color] # cv2.rectangle(frame, (int(bbox[0]), int( # bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1) # cv2.rectangle(frame, (int(bbox[0]), int( # bbox[1]-30)), (int(bbox[0])+(len(str(class_name)))*17, int(bbox[1])), color, -1) cv2.putText(frame, class_name, (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 251, 46), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): INPUT_SIZE = FLAGS.size STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) predicted_dir_path = './mAP/predicted' ground_truth_dir_path = './mAP/ground-truth' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) # Build Model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] num_lines = sum(1 for line in open(FLAGS.annotation_path)) with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: for num, line in enumerate(annotation_file): annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split('/')[-1] image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) bbox_data_gt = np.array( [list(map(int, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax ]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] # image_data = utils.image_preprocess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = cv2.resize(np.copy(image), (INPUT_SIZE, INPUT_SIZE)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov4' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) boxes, scores, classes, valid_detections = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] # if cfg.TEST.DECTECTED_IMAGE_PATH is not None: # image_result = utils.draw_bbox(np.copy(image), [boxes, scores, classes, valid_detections]) # cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image_result) with open(predict_result_path, 'w') as f: image_h, image_w, _ = image.shape for i in range(valid_detections[0]): if int(classes[0][i]) < 0 or int( classes[0][i]) > NUM_CLASS: continue coor = boxes[0][i] coor[0] = int(coor[0] * image_h) coor[2] = int(coor[2] * image_h) coor[1] = int(coor[1] * image_w) coor[3] = int(coor[3] * image_w) score = scores[0][i] class_ind = int(classes[0][i]) class_name = CLASSES[class_ind] score = '%.4f' % score ymin, xmin, ymax, xmax = list(map(str, coor)) bbox_mess = ' '.join( [class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print(num, num_lines)
def main(_argv): global NUM_CLASS, STRIDES, ANCHORS, XYSCALE INPUT_SIZE = FLAGS.size STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) predicted_dir_path = './mAP/predicted' ground_truth_dir_path = './mAP/ground-truth' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) # Build Model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) elif FLAGS.framework == 'tvm': ctx = tvm.cpu(0) loaded_graph = open(os.path.join(FLAGS.weights, "modelDescription.json")).read() loaded_lib = tvm.runtime.load_module(os.path.join(FLAGS.weights, "modelLibrary.so")) loaded_params = bytearray(open(os.path.join(FLAGS.weights, "modelParams.params"), "rb").read()) # # Get rid of the leip key # graphjson = json.loads(loaded_graph) if 'leip' in list(graphjson.keys()): del graphjson['leip'] loaded_graph = json.dumps(graphjson) m = graph_runtime.create(loaded_graph, loaded_lib, ctx) m.load_params(loaded_params) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] num_lines = sum(1 for line in open(FLAGS.annotation_path)) with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: for num, line in enumerate(annotation_file): annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split('/')[-1] image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') # Predict Process image_size = image.shape[:2] # image_data = utils.image_preprocess(np.copy(image), [INPUT_SIZE, INPUT_SIZE]) image_data = cv2.resize(np.copy(image), (INPUT_SIZE, INPUT_SIZE)) if FLAGS.framework == 'tflite': image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) image_data_casted = image_data.astype(np.uint8) interpreter.set_tensor(input_details[0]['index'], image_data_casted) interpreter.invoke() # pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] # if FLAGS.model == 'yolov4' and FLAGS.tiny == True: # boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25) # else: # boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25) fm1 = interpreter.get_tensor(output_details[0]['index']).astype(np.float32) fm2 = interpreter.get_tensor(output_details[1]['index']).astype(np.float32) fm3 = interpreter.get_tensor(output_details[2]['index']).astype(np.float32) print(fm1.shape) print(fm2.shape) print(fm3.shape) fm1 = my_dequantize(fm1.astype(np.float32), 1.1345850229263306, 223) fm2 = my_dequantize(fm2.astype(np.float32), 2.054811954498291, 242) fm3 = my_dequantize(fm3.astype(np.float32), 8.428282737731934, 248) pred = my_decode([fm1, fm2, fm3]) # these need to be ordered biggest tensor to smallest I think boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=FLAGS.score) elif FLAGS.framework == 'tvm': # image_data = image_data / 255. # DO NOT DIVIDE by 255 for uint8 eval! image_data = image_data[np.newaxis, ...].astype(np.float32) image_data_casted = image_data.astype(np.uint8) m.set_input("input_1", tvm.nd.array(image_data_casted)) ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=1) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond fm1 = m.get_output(0).asnumpy() fm2 = m.get_output(1).asnumpy() fm3 = m.get_output(2).asnumpy() print(fm1.shape) print(fm2.shape) print(fm3.shape) fm1 = my_dequantize(fm1.astype(np.float32), 1.1345850229263306, 223) fm2 = my_dequantize(fm2.astype(np.float32), 2.054811954498291, 242) fm3 = my_dequantize(fm3.astype(np.float32), 8.428282737731934, 248) pred = my_decode([fm1, fm2, fm3]) # these need to be ordered biggest tensor to smallest I think boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=FLAGS.score) #exit() else: image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) boxes, scores, classes, valid_detections = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] # if cfg.TEST.DECTECTED_IMAGE_PATH is not None: # image_result = utils.draw_bbox(np.copy(image), [boxes, scores, classes, valid_detections]) # cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image_result) with open(predict_result_path, 'w') as f: image_h, image_w, _ = image.shape for i in range(valid_detections[0]): if int(classes[0][i]) < 0 or int(classes[0][i]) > NUM_CLASS: continue coor = boxes[0][i] coor[0] = int(coor[0] * image_h) coor[2] = int(coor[2] * image_h) coor[1] = int(coor[1] * image_w) coor[3] = int(coor[3] * image_w) score = scores[0][i] class_ind = int(classes[0][i]) class_name = CLASSES[class_ind] score = '%.4f' % score ymin, xmin, ymax, xmax = list(map(str, coor)) bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print(num, num_lines)
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = False config.gpu_options.per_process_gpu_memory_fraction = 0.1 _ = InteractiveSession(config=config) utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter( model_path=f'{FLAGS.weights}_{FLAGS.size}') interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load( f'{FLAGS.weights}_{FLAGS.size}', tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) all_start_time = None frame_num = 0 # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) Image.fromarray(frame) else: fps = float(frame_num) / (time.time() - all_start_time) print("fps=%.2f size=%d frames=%d deep=%s output=%s" % (fps, FLAGS.size, frame_num, "true" if FLAGS.deep else "false", FLAGS.output)) break frame_num += 1 if FLAGS.info: print("frame_num=%d" % frame_num) start_time = time.time() if all_start_time is None: all_start_time = time.time() image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for _, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker if FLAGS.deep: features = encoder(frame, bboxes) else: features = np.empty((len(bboxes), 0), np.float32) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) # calculate frames per second of running detections if FLAGS.info: fps = 1.0 / (time.time() - start_time) print("fps=%.2f" % fps) if not FLAGS.dont_show: if cv2.waitKey(1) & 0xFF == ord('q'): break if not FLAGS.dont_show: cv2.destroyAllWindows()
def inference(preprocess_queue, inference_queue): import tensorflow as tf import core.utils as utils from tensorflow.python.saved_model import tag_constants from tensorflow.compat.v1 import InteractiveSession from tensorflow.compat.v1 import ConfigProto from core.functions import count_objects, crop_objects from core.config import cfg from core.utils import read_class_names import os import random from core.yolov4 import filter_boxes tf.keras.backend.clear_session() input_size = Parameters.input_size model = OutsourceContract.model framework = Parameters.framework tiny = OutsourceContract.tiny weights = Parameters.weights iou = Parameters.iou score = Parameters.score physical_devices = tf.config.experimental.list_physical_devices('GPU') try: if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) except: pass # configure gpu usage config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) # load model if framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=weights) else: saved_model_loaded = tf.saved_model.load(weights, tags=[tag_constants.SERVING]) # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) count = Parameters.count info = Parameters.info crop = Parameters.crop while True: if not preprocess_queue.empty(): queueData = preprocess_queue.get() while not preprocess_queue.empty(): queueData = preprocess_queue.get() #preprocess_queue.task_done() images_data = queueData[0] name = queueData[1] original_image = queueData[2] #preprocess_queue.task_done() if framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if model == 'yolov3' and tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=iou, score_threshold=score) # 1.2ms # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = original_image.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) # 1ms #-> no tf needed # hold all detection data in one variable pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) # allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if crop: crop_path = os.path.join(os.getcwd(), 'detections', 'crop', image_name) try: os.mkdir(crop_path) except FileExistsError: pass crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes) if count: # count objects found counted_classes = count_objects( pred_bbox, by_class=False, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, counted_classes, allowed_classes=allowed_classes) else: boxtext, image = utils.draw_bbox( original_image, pred_bbox, info, allowed_classes=allowed_classes) # 0.5ms image = Image.fromarray(image.astype(np.uint8)) # 0.3ms inference_queue.put((boxtext, image, name))
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images # load model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] image = utils.draw_bbox(original_image, pred_bbox) # image = utils.draw_bbox(image_data*255, pred_bbox) image = Image.fromarray(image.astype(np.uint8)) if not FLAGS.dont_show: image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
def main(_argv): # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size #images = FLAGS.images video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 if FLAGS.shirt: allowed_classes = ['Shirt'] #ROI = if FLAGS.trouser: allowed_classes = ['Trousers'] if FLAGS.jeans: allowed_classes = ['Jeans'] if FLAGS.dress: allowed_classes = ['Dress'] if FLAGS.footwear: allowed_classes = ['Footwear'] if FLAGS.jacket: allowed_classes = ['Jacket'] if FLAGS.skirt: allowed_classes = ['Skirt'] if FLAGS.suit: allowed_classes = ['Suit'] # while video is running while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_num += 1 print('Frame #: ', frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) #allowed_classes = ['person'] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) #cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format(str(track.track_id), class_name, (int( bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) if FLAGS.color: PATH = './training.data' #(int(bbox[0])):(int(bbox[2])), (int(bbox[1])):(int(bbox[3])) #ROI = frame[(int(bbox[0]) +50) :(int(bbox[2]) - 50), (int(bbox[1])+ 50):(int(bbox[3])-50)] #ROI = frame[(int(bbox[1])) +15 :(int(bbox[3])-15),(int(bbox[0])+15):(int(bbox[2])-15)] ROI = frame[int((int(bbox[1]) + int(bbox[3])) / 2):int((int(bbox[1]) + int(bbox[3])) / 2) + 1, int((int(bbox[0]) + int(bbox[2])) / 2):int((int(bbox[0]) + int(bbox[2])) / 2) + 1] #ROI = frame[(int(bbox[1])):(int(bbox[3])),(int(bbox[0])):(int(bbox[2]))] #ROI = frame[int(0.5* (int(bbox[1] - 50)+ int(bbox[3] + 50))),int(0.5*(int(bbox[0] - 50) +int(bbox[2] + 50 )))] #print(ROI) color_histogram_feature_extraction.color_histogram_of_test_image( ROI) prediction = knn_classifier.main('training.data', 'test.data') #prediction = 'red' red = load_red('test.data') Red = str(red) #Red = str(Red_1) print('this is the variable of the red:- ' + str(Red)) green = load_green('test.data') Green = str(green) #Green = str(Green_1) print('this is the variable of the green:- ' + str(Green)) blue = load_blue('test.data') #Blue_1 = int(blue) Blue = str(blue) print('this is the variable of the blue:- ' + str(Blue)) #hsv = rgb_to_hsv(red,green,blue) #print("HSV: " + str(hsv)) if red and blue and green != None: HLS = colorsys.rgb_to_hls(red, green, blue) HUE = int(HLS[0]) Light = int(HLS[1]) Saturation = int(HLS[2]) print("HLS is equal to", HLS) print('HUE: ', HUE) print('LIGHT: ', Light) print('Saturation', Saturation) if red and blue and green != None: HSV = rgb_to_hsv(red, green, blue) HUE_1 = int(HSV[0]) Saturation_1 = int(HSV[1]) Value = int(HSV[2]) print("HSV is equal to", HSV) print('Hue: ', HUE_1) print('saturation: ', Saturation_1) print('value', Value) print(str(prediction) + " " + str(class_name)) if FLAGS.Fuzzy_black: #if str(59.7) <= Red < str(200.9) and str(74) <= Blue < str(207) and str(70) <= Green < str(203): if 0 <= HUE_1 < 210 and 0 <= Saturation_1 < 41 and 0 <= Value < 86: print("THIS IS THE black COLOR yaaaaaaaaaaaaaaaaaaaa") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "BLACK" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_red: #if str(139) <= Red < str(255) and str(0) <= Green < str(160) and str(0) <= Blue < str(128): if 0 <= HUE_1 < 348 and 47 <= Saturation_1 < 100 and 55 <= Value < 100: print( "THIS IS THE red COLOR redddddddddddddddddddddddddddddddddddd" ) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "RED" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_orange: #if str(255) <= Red < str(255) and str(69) <= Green < str(165) and str(0) <= Blue < str(80): if 9 <= HUE_1 < 39 and 69 <= Saturation_1 < 100 and Value == 100: print( "THIS IS THE ORANGE COLOR orangeeeeeeeeeeeeeeeeeeeeeeee" ) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "ORANGE" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_yellow: #if str(189) <= Red < str(255) and str(183) <= Green < str(255) and str(0) <= Blue < str(224): if 0 <= HUE_1 < 56 and 12 <= Saturation_1 < 100 and 74 <= Value < 100: print("THIS IS THE YELLOW COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "YELLOW" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_blue: #if str(0) <= Red < str(176) and str(0) <= Green < str(244) and str(112) <= Blue < str(255): if 187 <= HUE_1 < 240 and 21 <= Saturation_1 < 100 and 44 <= Value < 100: print("THIS IS THE BLUE COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "BLUE" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_white: #if str(240) <= Red < str(255) and str(228) <= Green < str(255) and str(215) <= Blue < str(255): if 0 <= HUE_1 < 340 and 0 <= Saturation_1 < 14 and 96 <= Value < 100: print("THIS IS THE WHITE COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "WHITE" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_purple: #if str(72) <= Red < str(255) and str(0) <= Green < str(230) and str(128) <= Blue < str(255): if 0 <= HUE_1 < 302 and 8 <= Saturation_1 < 100 and 50 <= Value < 100: print("THIS IS THE PURPLE COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "PURPLE" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_green: #if str(0) <= Red < str(173) and str(100) <= Green < str(255) and str(0) <= Blue < str(170): if 0 <= HUE_1 < 160 and 24 <= Saturation_1 < 100 and 39 <= Value < 100: print("THIS IS THE green COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "GREEN" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_brown: #if str(128) <= Red < str(255) and str(0) <= Green < str(248) and str(0) <= Blue < str(288): if 0 <= HUE_1 < 48 and 14 <= Saturation_1 < 100 and 50 <= Value < 100: print("THIS IS THE BROWN COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "BROWN" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_cyan: #if str(0) <= Red < str(244) and str(128) <= Green < str(255) and str(128) <= Blue < str(255): if 0 <= HUE_1 < 182 and 12 <= Saturation_1 < 100 and 50 <= Value < 100: print("THIS IS THE CYAN COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "CYAN" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.Fuzzy_pink: #if str(199) <= Red < str(255) and str(20) <= Green < str(192) and str(133) <= Blue < str(203): if 322 <= HUE_1 < 351 and 25 <= Saturation_1 < 92 and 78 <= Value < 100: print("THIS IS THE PINK COLOR") cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + "PINK" + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.black: if prediction == 'black': #ROI = frame[int((int(bbox[1]) + int(bbox[3]))/2):int((int(bbox[1]) + int(bbox[3]))/2)+1,int((int(bbox[0]) + int(bbox[2]))/2):int((int(bbox[0]) + int(bbox[2]))/2)+1] #color_histogram_feature_extraction.color_histogram_of_test_image(ROI) #prediction = knn_classifier.main('training.data','test.data') cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.blue: if prediction == 'blue': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.red: if prediction == 'red': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.yellow: if prediction == 'yellow': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.orange: if prediction == 'orange': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.violet: if prediction == 'violet': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.white: if prediction == 'white': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) if FLAGS.green: if prediction == 'green': cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])), color, -1) cv2.putText( frame, class_name + " " + str(prediction) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) #cv2.putText(frame, class_name + " " + str(prediction) + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) #print('ferture data:' +" " + feature_data) #result_1 = np.asarray(frame) #result_1 = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) #cv2.imshow('color classifier', result_1) #print(color_histogram_feature_extraction.feature_data) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def Score(self, cvImage): """Use tflite interpreter to predict bounding boxes and confidence score.""" with self._lock: timestamp = datetime.datetime.now() # Predict try: image_data = self.Preprocess(cvImage) self.interpreter.set_tensor(self.input_details[0]['index'], image_data) self.interpreter.invoke() pred = [ self.interpreter.get_tensor( self.output_details[i]['index']) for i in range(len(self.output_details)) ] except Exception as err: return [{ '[ERROR]': 'Error during prediciton: {}'.format(repr(err)) }] # Filter and NMS try: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([self.input_size, self.input_size])) boxes, scores, indices, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) except Exception as err: return [{ '[ERROR]': 'Error during filter and NMS: {}'.format(repr(err)) }] try: # Save image w/ annotations to Blob Storage (through IoT module # and then to cloud Azure Storage pending connectivity) pred_bbox = [ boxes.numpy(), scores.numpy(), indices.numpy(), valid_detections.numpy() ] image = cv2.cvtColor(cvImage, cv2.COLOR_BGR2RGB) image_annot = utils.draw_bbox(image, pred_bbox) #image = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(image_annot.astype(np.uint8)) # Save annotaed image to buffer bytes_io_annot = io.BytesIO() pil_image.save(bytes_io_annot, format='JPEG') bytes_im_annot = bytes_io_annot.getvalue() # Unannotated image to buffer pil_image = Image.fromarray(image.astype(np.uint8)) bytes_io_unannot = io.BytesIO() pil_image.save(bytes_io_unannot, format='JPEG') bytes_im_unannot = bytes_io_unannot.getvalue() # To check if there are bboxes indices_check = np.squeeze(indices.numpy(), axis=0) scores_check = np.squeeze(scores.numpy(), axis=0) if scores_check.any() > FLAGS.score: # Name in blob to use blob_name = str(timestamp.strftime( "%d-%b-%Y-%H-%M-%S.%f")) + "_annotated.jpg" blob_metadata = { 'timestamp': str(timestamp.strftime("%d-%b-%Y-%H-%M-%S.%f")), 'objects': ','.join( set([ self._labelList[int(indices_check[i])] for i in range(len(indices_check)) if scores_check[i] > FLAGS.score ])) } try: container_client = self.blob_service_client.get_container_client( self.local_container_name_annotated) props = container_client.get_container_properties() except Exception as err: # Local container needs to be created if not container_client.create_container() # Upload pil image as buffer container_client.upload_blob(blob_name, bytes_im_annot, metadata=blob_metadata) # If all scores are below threshold let's store the frames for later use if scores_check.all() < FLAGS.score: # Name in blob to use blob_name = str(timestamp.strftime( "%d-%b-%Y-%H-%M-%S.%f")) + "_lowconf.jpg" blob_metadata = { 'timestamp': str(timestamp.strftime("%d-%b-%Y-%H-%M-%S.%f")), 'objects': ','.join( set([ self._labelList[int(indices_check[i])] for i in range(len(indices_check)) ])) } try: container_client = self.blob_service_client.get_container_client( self.local_container_name_lowconf) props = container_client.get_container_properties() except Exception as err: # Local container needs to be created if not container_client.create_container() # Upload pil image as buffer container_client.upload_blob(blob_name, bytes_im_unannot, metadata=blob_metadata) except Exception as err: exc_type, exc_value, exc_traceback = sys.exc_info() return [{ '[ERROR]': 'Error sending image to local blob storage: {}'.format( repr( traceback.format_exception(exc_type, exc_value, exc_traceback))) }] # Postprocess try: boxes = np.squeeze(boxes.numpy(), axis=0) scores = np.squeeze(scores.numpy(), axis=0) indices = np.squeeze(indices.numpy(), axis=0) results = self.Postprocess(boxes, scores, indices) except Exception as err: return [{ '[ERROR]': 'Error during postprocess: {}'.format(repr(err)) }] return results
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # get video name by using split method video_name = video_path.split('/')[-1] video_name = video_name.split('.')[0] if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_num += 1 image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score ) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) #allowed_classes = ['person'] # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames) crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name) try: os.mkdir(crop_path) except FileExistsError: pass if frame_num % crop_rate == 0: final_path = os.path.join(crop_path, 'frame_' + str(frame_num)) try: os.mkdir(final_path) except FileExistsError: pass crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes) else: pass if FLAGS.count: # count objects found counted_classes = count_objects(pred_bbox, by_class = True, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # if not FLAGS.dont_show: # cv2.imshow("result", result) if FLAGS.output: out.write(result)
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # get video name by using split method video_name = video_path.split('/')[-1] video_name = video_name.split('.')[0] if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) firstFrame = True frame_num = 0 while True: return_value, frame_1 = vid.read() pts = [] aa = [] bb = [] cc = [] dd = [] while firstFrame: def click_event(event, x, y, flags, param): global pts if event == cv2.EVENT_LBUTTONDOWN: pts.append((x, y)) cv2.circle(frame_1, center=(x, y), radius=5, color=(0, 0, 255), thickness=-1) strXY = str(x) + " " + str(y) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(frame_1, strXY, (x, y), font, 0.5, (255, 255, 0), 2) elif event == cv2.EVENT_RBUTTONDOWN: if pts: pts.pop() cv2.imshow('bobur', frame_1) cv2.imshow('bobur', frame_1) cv2.setMouseCallback('bobur', click_event) if cv2.waitKey(1) & 0xFF == ord('c'): firstFrame = False break if len(pts) >= 4: aa.append(pts[0]) bb.append(pts[1]) cc.append(pts[2]) dd.append(pts[3]) print(aa, bb, cc, dd) a, b, c, d, e, f, g, h = [209, 1040], [331, 197], [1124, 197], [ 1907, 850 ], [0, 0], [1920, 0], [1920, 1080], [0, 1080] # e,f,g,h = [0,0],[1920,0],[1920,1080],[0,1080] external_poly = [ np.array([e, b, c, f]), np.array([f, c, d, g]), np.array([g, d, a, h]), np.array([h, a, b, e]) ] frame = cv2.fillPoly(frame_1, external_poly, (0, 0, 0)) # cv2.line(frame,(209, 1040),(331,197),(255,0,0),2) # cv2.line(frame,(331, 197), (1124,197),(255,0,0),2) # cv2.line(frame,(1124,197),(1907,850),(255,0,0),2) # cv2.line(frame,(209, 1040),(1907,850),(255,0,0),2) # cv2.line(frame,a,b,(255,0,0),2) # cv2.line(frame,b,c,(255,0,0),2) # cv2.line(frame,c,d,(255,0,0),2) # cv2.line(frame,a,d,(255,0,0),2) if return_value: # frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) #rotate the video for mobile videos frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_num += 1 image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break if frame_num % 15 == 0: frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=100, max_total_size=100, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w) pred_bbox = [ bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0] ] # print(pred_bbox[2]) out_boxes, out_scores, out_classes, num_boxes = pred_bbox # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only SELECTED DETECTION CLASSES) allowed_classes = ['person', 'car', 'truck', 'bus', 'motorbike'] # allowed_classes = ['car'] # if crop flag is enabled, crop each detection and save it as new image if FLAGS.crop: crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames) crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name) try: os.mkdir(crop_path) except FileExistsError: pass if frame_num % crop_rate == 0: final_path = os.path.join(crop_path, 'frame_' + str(frame_num)) try: os.mkdir(final_path) except FileExistsError: pass crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes) else: pass if FLAGS.count: # count objects found counted_classes = count_objects( pred_bbox, by_class=True, allowed_classes=allowed_classes) # loop through dict and print for key, value in counted_classes.items(): print("Number of {}s: {}".format(key, value)) image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate) else: image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() cv2.destroyAllWindows()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures['serving_default'] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) out = None if FLAGS.output: # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video has ended or failed, try a different video format!') break frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255. image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() if FLAGS.framework == 'tflite': interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details)) ] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant( [input_size, input_size])) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score) pred_bbox = [ boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy() ] image = utils.draw_bbox(frame, pred_bbox) fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(image) cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("result", result) if FLAGS.output: out.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows()
def main(_argv): with open("./config_birdview.yml", "r") as ymlfile: bird_view_cfg = yaml.load(ymlfile) width_og, height_og = 0, 0 corner_points = [] for section in bird_view_cfg: corner_points.append(bird_view_cfg["image_parameters"]["p1"]) corner_points.append(bird_view_cfg["image_parameters"]["p2"]) corner_points.append(bird_view_cfg["image_parameters"]["p3"]) corner_points.append(bird_view_cfg["image_parameters"]["p4"]) width_og = int(bird_view_cfg["image_parameters"]["width_og"]) height_og = int(bird_view_cfg["image_parameters"]["height_og"]) img_path = bird_view_cfg["image_parameters"]["img_path"] size_height = bird_view_cfg["image_parameters"]["size_height"] size_width = bird_view_cfg["image_parameters"]["size_width"] tr = np.array([ bird_view_cfg["image_parameters"]["p4"][0], bird_view_cfg["image_parameters"]["p4"][1], ]) tl = np.array([ bird_view_cfg["image_parameters"]["p2"][0], bird_view_cfg["image_parameters"]["p2"][1], ]) br = np.array([ bird_view_cfg["image_parameters"]["p3"][0], bird_view_cfg["image_parameters"]["p3"][1], ]) bl = np.array([ bird_view_cfg["image_parameters"]["p1"][0], bird_view_cfg["image_parameters"]["p1"][1], ]) widthA = np.sqrt(((br[0] - bl[0])**2) + ((br[1] - bl[1])**2)) widthB = np.sqrt(((tr[0] - tl[0])**2) + ((tr[1] - tl[1])**2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0])**2) + ((tr[1] - br[1])**2)) heightB = np.sqrt(((tl[0] - bl[0])**2) + ((tl[1] - bl[1])**2)) maxHeight = max(int(heightA), int(heightB)) matrix, imgOutput = compute_perspective_transform(corner_points, maxWidth, maxHeight, cv2.imread(img_path)) height, width, _ = imgOutput.shape dim = (width, height) # Definition of the parameters max_cosine_distance = 0.4 nn_budget = None nms_max_overlap = 1.0 # initialize deep sort model_filename = "model_data/mars-small128.pb" encoder = gdet.create_box_encoder(model_filename, batch_size=1) # calculate cosine distance metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # initialize tracker tracker = Tracker(metric) # load configuration for object detector config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size video_path = FLAGS.video # load tflite model if flag is set if FLAGS.framework == "tflite": interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) # otherwise load standard tensorflow saved model else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) infer = saved_model_loaded.signatures["serving_default"] # begin video capture try: vid = cv2.VideoCapture(int(video_path)) except: vid = cv2.VideoCapture(video_path) output_video_1, output_video_2 = None, None # get video ready to save locally if flag is set if FLAGS.output: # by default VideoCapture returns float instead of int """ width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) """ fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) frame_num = 0 # while video is running while True: black_img = cv2.imread("./black_bg.png") black_img = cv2.resize(black_img, dim, interpolation=cv2.INTER_AREA) return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print("Video has ended or failed, try a different video format!") break frame_num += 1 print("Frame #: ", frame_num) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data / 255.0 image_data = image_data[np.newaxis, ...].astype(np.float32) start_time = time.time() # run detections on tflite if flag is set if FLAGS.framework == "tflite": interpreter.set_tensor(input_details[0]["index"], image_data) interpreter.invoke() pred = [ interpreter.get_tensor(output_details[i]["index"]) for i in range(len(output_details)) ] # run detections using yolov3 if flag is set if FLAGS.model == "yolov3" and FLAGS.tiny == True: boxes, pred_conf = filter_boxes( pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]), ) else: boxes, pred_conf = filter_boxes( pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]), ) else: batch_data = tf.constant(image_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] ( boxes, scores, classes, valid_detections, ) = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=50, max_total_size=50, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score, ) # convert data to numpy arrays and slice out unused elements num_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0:int(num_objects)] scores = scores.numpy()[0] scores = scores[0:int(num_objects)] classes = classes.numpy()[0] classes = classes[0:int(num_objects)] # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) # store all predictions in one parameter for simplicity when calling functions pred_bbox = [bboxes, scores, classes, num_objects] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file # allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to customize tracker for only people) allowed_classes = ["person"] # loop through objects and use class index to get class name, allow only classes in allowed_classes list names = [] deleted_indx = [] for i in range(num_objects): class_indx = int(classes[i]) class_name = class_names[class_indx] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) if FLAGS.count: cv2.putText( frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2, ) print("Objects being tracked: {}".format(count)) # delete detections that are not in allowed_classes bboxes = np.delete(bboxes, deleted_indx, axis=0) scores = np.delete(scores, deleted_indx, axis=0) # encode yolo detections and feed to tracker features = encoder(frame, bboxes) detections = [ Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip( bboxes, scores, names, features) ] # initialize color map cmap = plt.get_cmap("tab20b") colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] # run non-maxima supression boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) bbox_array = [] # update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() bbox_array.append( (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) class_name = track.get_class() # draw bbox on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle( frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2, ) cv2.rectangle( frame, (int(bbox[0]), int(bbox[1] - 30)), ( int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1]), ), color, -1, ) cv2.putText( frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2, ) # if enable info flag then print details about each track if FLAGS.info: print( "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}" .format( str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int( bbox[3])), )) if len(bbox_array) >= 1: array_centroids, array_groundpoints = get_centroids_and_groundpoints( bbox_array) transformed_downoids = compute_point_perspective_transformation( matrix, array_centroids) # Show every point on the top view image for point in transformed_downoids: x, y = point cv2.circle(black_img, (x, y), 60, (0, 255, 0), 2) cv2.circle(black_img, (x, y), 3, (0, 255, 0), -1) # calculate frames per second of running detections fps = 1.0 / (time.time() - start_time) print("FPS: %.2f" % fps) result = np.asarray(frame) # result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if not FLAGS.dont_show: cv2.imshow("Output Video", result) # if output flag is set, save video file if FLAGS.output: if output_video_1 is None and output_video_2 is None: fourcc1 = cv2.VideoWriter_fourcc(*"MJPG") output_video_1 = cv2.VideoWriter( "./video.avi", fourcc1, 25, (frame.shape[1], frame.shape[0]), True) fourcc2 = cv2.VideoWriter_fourcc(*"MJPG") output_video_2 = cv2.VideoWriter( "./bird_view.avi", fourcc2, 25, (black_img.shape[1], black_img.shape[0]), True, ) elif output_video_1 is not None and output_video_2 is not None: output_video_1.write(frame) output_video_2.write(black_img) if cv2.waitKey(1) & 0xFF == ord("q"): break cv2.destroyAllWindows()
def main(_argv): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS) input_size = FLAGS.size images = FLAGS.images # load model if FLAGS.framework == 'tflite': interpreter = tf.lite.Interpreter(model_path=FLAGS.weights) else: saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING]) # loop through images in list and run Yolov4 model on each for count, image_path in enumerate(images, 1): original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) image_data = cv2.resize(original_image, (input_size, input_size)) image_data = image_data / 255. images_data = [] for i in range(1): images_data.append(image_data) images_data = np.asarray(images_data).astype(np.float32) if FLAGS.framework == 'tflite': interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() print(input_details) print(output_details) interpreter.set_tensor(input_details[0]['index'], images_data) interpreter.invoke() pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] if FLAGS.model == 'yolov3' and FLAGS.tiny == True: boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size])) else: infer = saved_model_loaded.signatures['serving_default'] batch_data = tf.constant(images_data) pred_bbox = infer(batch_data) for key, value in pred_bbox.items(): boxes = value[:, :, 0:4] pred_conf = value[:, :, 4:] boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores=tf.reshape( pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])), max_output_size_per_class=5, max_total_size=10, iou_threshold=FLAGS.iou, score_threshold=FLAGS.score, ) pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()] # read in all class names from config class_names = utils.read_class_names(cfg.YOLO.CLASSES) # by default allow all classes in .names file allowed_classes = list(class_names.values()) # custom allowed classes (uncomment line below to allow detections for only people) #allowed_classes = ['person'] image = utils.draw_bbox(original_image, pred_bbox, allowed_classes = allowed_classes) image = Image.fromarray(image.astype(np.uint8)) if not FLAGS.dont_show: image.show() image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image) # ========== voiceFeedback ========== valid_items = pred_bbox[3][0] valid_classes = pred_bbox[2][0] valid_boxes = pred_bbox[0][0] # section = (input_size/3) (H, W) = original_image.shape[:2] res = [] for i in range(valid_items): (top, left, bottom, right) = valid_boxes[i] centerX = round((right + left)/2) centerY = round((top + bottom)/2) if centerX <= W/3: w_pos = 'left ' elif centerX <= (W/3 * 2): w_pos = 'center ' else: w_pos = 'right ' if centerY <= H/3: h_pos = 'top ' elif centerY <= (H/3 * 2): h_pos = 'mid ' else: h_pos = 'bottom ' res.append(h_pos + w_pos + allowed_classes[int(valid_classes[i])]) description = ', '.join(res) tts = gTTS(text=description, lang="en", slow=False) filename = f'./detections/voice{count}.mp3' tts.save(filename) playsound.playsound(filename)
def startRecording_YOLO(): date_and_time = time.strftime("%Y%m%d-%H-%M-%S") #Stores current date and time in YYYY-MM-DD-HH:MM format vid_out_path = os.path.join(PROJECT_DIR, 'YoloV4', 'outputs', date_and_time + '.avi') #vid = cv2.VideoCapture(test_drive) #0 for webcam/Raspberry Pi Cam videothread = VideoThread(resolution=(640,480), framerate=30).start() width = int(videothread.stream.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(videothread.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(videothread.stream.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height)) #width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) #height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) #fps = int(vid.get(cv2.CAP_PROP_FPS)) #codec = cv2.VideoWriter_fourcc(*'XVID') #output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height)) frame_number = 0 freq = cv2.getTickFrequency() avg_fps = 0 #while video is running/recording while True: return_val, frame = videothread.read() #return_val, frame = vid.read() if return_val: #frame = cv2.flip(frame, -1) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: print('Video error, try another format') break frame_number += 1 #print('Frame #: ', frame_number) frame_size = frame.shape[:2] image_data = cv2.resize(frame, (input_size, input_size)) image_data = image_data/ 255. #mage_data = np.expand_dims(frame_resized, axis = 0) #if floating_model: # image_data = (np.float32(image_data) - 127.5)/127.5 image_data = image_data[np.newaxis, ...].astype(np.float32) #Converts image data to a float32 type start_time = time.time() #TFLite Detections interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() prediction = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))] #box = interpreter.get_tensor(output_details[0]['index'])[0] #scores = interpreter.get_tensor(output_details[2]['index'])[0] boxes, prediction_conf = filter_boxes(prediction[0], prediction[1], score_threshold=0.4, input_shape=tf.constant([input_size, input_size])) #Reshape = returns a new tensor that has the same values as tensor in the same order, but with a new shape given by shape #Shape = returns a 1-D integer tensor, represents the shape of the input boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression( boxes = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)), scores = tf.reshape(prediction_conf, (tf.shape(prediction_conf)[0], -1, tf.shape(prediction_conf)[-1])), max_output_size_per_class = 50, max_total_size = 50, iou_threshold = 0.45, score_threshold = 0.5 ) #convert the received data into numpy arrays, then slice out unused elements number_of_objects = valid_detections.numpy()[0] bboxes = boxes.numpy()[0] bboxes = bboxes[0 : int(number_of_objects)] scores = scores.numpy()[0] scores = scores[0 : int(number_of_objects)] classes = classes.numpy()[0] classes = classes[0 : int(number_of_objects)] #format bounding boxes with normalized minimums and maximums of x and y original_h, original_w, _ = frame.shape bboxes = utils.format_boxes(bboxes, original_h, original_w) prediction_bbox = [bboxes, scores, classes, number_of_objects] #Read in all the class names from config and only allow certain ones to be detected (eases computation power) class_names = utils.read_class_names(cfg.YOLO.CLASSES) allowed_classes = ['traffic light', 'person', 'car', 'stop sign'] #loop through objects and get classification name, using only the ones allows in allowed_classes names = [] deleted_indx = [] for i in range(number_of_objects): classification_index = int(classes[i]) class_name = class_names[classification_index] if class_name not in allowed_classes: deleted_indx.append(i) else: names.append(class_name) names = np.array(names) count = len(names) #delete irrelevant detections (not in allowed_classes) bboxes = np.delete(bboxes, deleted_indx, axis = 0) scores = np.delete(scores, deleted_indx, axis = 0) #Feed tracker with encoded yolo detections detections_features = encoder(frame, bboxes) detections = [Detection(bbox, score, class_name, detection_feature) for bbox, score, class_name, detection_feature in zip(bboxes, scores, names, detections_features)] #initialize color map cmap = plt.get_cmap('tab20b') colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] #run non-maxima supression (reduces amount of detected entities to as little as possible) boxs = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) classes = np.array([d.class_name for d in detections]) indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #Call tracker tracker.predict() tracker.update(detections) #update tracks for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() class_name = track.get_class() #if class_name == 'person': print('person found') #change frame to that which showcases the lane detection #frame = lane_detect.detect_edges(frame) #COMMENT OUT IF/WHEN ERROR OCCURS #distance approximation (barebones, needs more adjusting) cam_parameter = 18 #change with different cameras. Gets the detected distance closer to actual distance distance = (np.pi)/(bbox[2].item() + bbox[3].item()) * 1000 + cam_parameter det_dest = str(int(distance)) #draw bounded box on screen color = colors[int(track.track_id) % len(colors)] color = [i * 255 for i in color] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(det_dest)) * 18, int(bbox[1])), color, -1) #cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) cv2.putText(frame, class_name + ": " + str(int(distance)), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2) #calculate fps of running detections fps = 1.0/ (time.time() - start_time) avg_fps = avg_fps + fps #print("FPS: %.2f" % fps) cv2.putText(frame, "FPS: " + str(int(fps)), (width - 100, height - 20),0, 0.75, (255,255,255),2) result = np.asarray(frame) result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) cv2.imshow("Output Video", result) output_video.write(result) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() print('Average FPS: ', (avg_fps/frame_number)) print('Number of Frames: ', frame_number) videothread.stop()