def socket_start(port=6066): demo = ObjectDetector() print("load model success!\n") fdSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) fdSocket.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER, struct.pack('ii', 1, 0)) fdSocket.bind(('127.0.0.1', port)) fdSocket.listen(10) while True: conn, addr = fdSocket.accept() data, _ = conn.recvfrom(1024) data = data.decode('utf-8') start_time = time.time() image = cv2.imread(data) detect_msg = "" if image is None: detect_msg = "image path error" rects = [] else: rects = demo.detect(image) rects = rects.tolist() detect_msg = "success" end_time = time.time() results = json.dumps({ "rects": rects, "cost_time": f"{end_time - start_time:.03f} s", "detect_msg": detect_msg }) conn.sendto(str.encode(results), addr) fdSocket.close()
def object_detection_for_videos_folder_test(video_folder_path: str, checkpoint_file_path: str = None, mean: list = None, stddev: list = None): """ 视频文件夹目标检测 Parameters ---------- test_folder_path 视频文件夹地址 checkpoint_file_path 目标检测模型地址 mean 目标检测模型训练使用的图像均值 stddev 目标检测模型训练使用的图像标准差 Returns ------- None """ face_detector = None if checkpoint_file_path is not None: face_detector = ObjectDetector(checkpoint_file_path) video = cv2.VideoCapture() video_object_detection = BoxLandmarkVideoDetector( face_detector=face_detector) for video_index, video_path in enumerate( sorted(glob.glob("{}/*.mp4".format(video_folder_path)))): print(video_index, os.path.basename(video_path)) if not video.open(video_path): print("can not open the video: ", video_path) return video_object_detection.detect(video) video.release()
def main(): detector = ObjectDetector() image_path = os.path.join(os.getcwd(), "./data/image_data/images/00000_1.jpg") image = cv2.imread(image_path, 1) input_image = detector.detector._pre_process(image) pred = detector.detector.inference(input_image) outputs = detector.detector._post_process(pred) file = load_image(image_path) cv_load_image_time = compute_time(cv2.imread, [image_path]) print("avg opencv load image time is {:02f} ms".format(cv_load_image_time)) PIL_load_image_time = compute_time(load_image, [image_path]) print("avg PIL load image time is {:02f} ms".format(PIL_load_image_time)) preprocess_time = compute_time(detector.detector._pre_process, [image]) print("avg preprocess time is {:02f} ms".format(preprocess_time)) inference_time = compute_time(detector.detector.inference, [input_image]) print("avg inference time is {:02f} ms".format(inference_time)) postprocess_time = compute_time(detector.detector._post_process, [pred]) print("avg postprocess time is {:02f} ms".format(postprocess_time)) total_time = compute_time(detector.detect, [image]) print("avg total predict time is {:02f} ms".format(total_time))
def __init__(self, face_detector=None, landmark_detector=None): self.face_detector = face_detector self.landmark_detector = landmark_detector if self.face_detector is None: self.face_detector = ObjectDetector() if self.landmark_detector is None: self.landmark_detector = LandmarkDetector()
def __init__(self, face_detector=None, landmark_detector=None): self.auto_play_flag = True self.decay_time = 1 if self.auto_play_flag else 0 self.face_detector = face_detector self.landmark_detector = landmark_detector if self.face_detector is None: self.face_detector = ObjectDetector() if self.landmark_detector is None: self.landmark_detector = LandmarkDetector()
def main(): person_library = r"F:\tmp\person_search\librarys" work_root = os.path.dirname(os.path.dirname(working_root)) video_path = os.path.join(work_root, "data/video_data/videos/1.mp4") # onnx_file_path = os.path.join(work_root, r"checkpoints/face_reid/backbone_ir50_asia-sim.onnx") detector = ObjectDetector() extractor = FeatureExtract() person_search = VideoRecognition(person_library, detector, extractor) person_search.person_library_feature_extract() print(person_search.person_feature_library.shape) video = cv2.VideoCapture() if not video.open(video_path): print("can not open the video: ", video_path) return person_search.detect(video)
def init(self): if self.detector is None: self.detector = ObjectDetector() self.extractor = FeatureExtract()
def create_knowledge_graph(frame): """ create a knowledge graph of the given image; will be used to generate a caption to pass to the qa system; format: { classes:{ class1: { count: n1 objects: { class1object1: { color: class1object1color, location: [left, top, right, bottom], text: ? }, class1object2: { color: class1object2color location: [left, top, right, bottom], text: ? }, ... } }, ... } scene:scene } :param frame: input image :return: knowledge dict and plotted image """ knowledge = {"scene": SceneClassifier.predict(frame), "classes": {}} boxes, classes = ObjectDetector.predict(frame) class_count = Counter(classes) # populate counts for class_, count in class_count.items(): knowledge["classes"][class_] = {} knowledge["classes"][class_]["count"] = count # populate object attributes for i, box in enumerate(boxes): l, t, r, b = list(map(int, box)) crop = frame[t:b, l:r] text = TextDetector.detect(crop) color = ColorDetector.find_color(crop) index = len(knowledge["classes"][classes[i]].get("objects", {})) if index == 0: knowledge["classes"][classes[i]]["objects"] = {} knowledge["classes"][classes[i]]["objects"]["%s%d" % (classes[i], index)] = { "color": color, "location": [l, t, r, b], "text": text } # plot for i, box in enumerate(boxes): l, t, r, b = list(map(int, box)) label = classes[i] # bounding boxes cv2.rectangle(frame, (l, t), (r, b), (0, 255, 0), 3) # put labels cv2.putText(frame, str(label), (l + 10, b - 10), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2) return knowledge, frame