def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = False asyncVideo_flag = False webcamera_flag = True ipcamera_flag = False udp_flag = True file_path = '/workspace/data/C0133_v4.mp4' if asyncVideo_flag : video_capture = VideoCaptureAsync(file_path) elif ipcamera_flag : video_capture = cv2.VideoCapture('rtsp://*****:*****@192.168.2.201/ONVIF/MediaInput?profile=def_profile1') elif webcamera_flag : video_capture = cv2.VideoCapture(0) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 if udp_flag: HOST = '' PORT = 5000 address = '192.168.2.255' sock =socket(AF_INET, SOCK_DGRAM) sock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1) sock.bind((HOST, PORT)) fps = 0.0 fps_imutils = imutils.video.FPS().start() savetime = 0 while True: nowtime = datetime.datetime.now().isoformat() ret, frame = video_capture.read() # frame shape 640*480*3 t1 = time.time() if time.time() - savetime >= 30: print('save data') cv2.imwrite("/workspace/images/image.png", frame) savetime = time.time() image = Image.fromarray(frame[...,::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip(boxes, confidence, classes, features)] else: detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) # socket message = str(nowtime + "," + str(track.track_id) + "," + str(int(bbox[0])) + "," + str(int(bbox[1])) + "," + str(int(bbox[2])) + "," + str(int(bbox[3]))) bmessage = message.encode('utf-8') print(type(bmessage)) if udp_flag: sock.sendto(message.encode('utf-8'), (address, PORT)) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) #cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1./(time.time()-t1))) / 2 print("FPS = %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = False asyncVideo_flag = False file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = True asyncVideo_flag = False file_path = 'video.webm' if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() model_par, valid_transform = model_init_par() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() #crop_img = frame[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] crop_img = image.crop( [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) #res_txt = demo_par(model_par, valid_transform, crop_img) #draw.rectangle(xy=person_bbox[:-1], outline='red', width=1) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0], (0, 255, 0), 1) font = ImageFont.truetype( '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/arial.ttf', size=10) # positive_cnt = 1 # for txt in res_txt: # if 'personal' in txt: # #draw.text((x1, y1 + 20 * positive_cnt), txt, (255, 0, 0), font=font) # cv2.putText(frame, txt, (int(bbox[0]), int(bbox[1]) + 20 * positive_cnt), 0, # 1e-3 * frame.shape[0], (0, 255, 0), 1) # positive_cnt += 1 cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 width = 1280 height = 720 rfps = 10 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = args.tracking writeVideo_flag = args.writeVideo_flag asyncVideo_flag = args.asyncVideo_flag webcamera_flag = args.webcamera_flag ipcamera_flag = args.ipcamera_flag udp_flag = args.udp_flag full_cam_addr, key = sd.set_address(args.ipaddress, args.cam_ip, args.cam_cmd, args.key) cam_ip = full_cam_addr.replace(args.cam_cmd, "").replace("rtsp://*****:*****@", "") if args.jpegmode: full_cam_addr = full_cam_addr.replace("rtsp", "http") print(full_cam_addr) print(key) if asyncVideo_flag: print("load videofile") video_capture = VideoCaptureAsync(args.videofile) elif ipcamera_flag or args.jpegmode: print("load ipcamera") video_capture = cv2.VideoCapture(full_cam_addr) # width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) # height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) # rfps = video_capture.get(cv2.CAP_PROP_FPS) print("fps:{}width:{}height:{}".format(rfps, width, height)) elif webcamera_flag: print("load webcamera") video_capture = cv2.VideoCapture(0) else: print("load videofile") video_capture = cv2.VideoCapture(args.videofile) # video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 if udp_flag: HOST = '' PORT = 5000 address = '192.168.2.255' sock = socket(AF_INET, SOCK_DGRAM) sock.setsockopt(SOL_SOCKET, SO_BROADCAST, 1) sock.bind((HOST, PORT)) fps = 0.0 i = 0 if not args.maskoff: maskbgi = Image.new('RGB', (int(width), int(height)), (0, 0, 0)) mask = Image.open(args.maskdir + 'mask' + args.ipaddress[-1] + '.png').convert("L").resize(size=(int(width), int(height)), resample=Image.NEAREST) while True: nowtime = datetime.datetime.now( timezone('Asia/Tokyo')).strftime('%Y-%m-%d %H:%M:%S.%f%z') ret, frame = video_capture.read() # frame shape 640*480*3 if not ret: print('cant read') video_capture = cv2.VideoCapture(full_cam_addr) continue t1 = time.time() try: image = Image.fromarray(frame[..., ::-1]) # bgr to rgb except TypeError: video_capture = cv2.VideoCapture(full_cam_addr) continue image = Image.composite(maskbgi, image, mask) boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] car_data = {} if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() car_data[str(track.track_id)] = [ int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) ] sd.send_amqp( sd.create_jsondata(cam_ip, nowtime, time.time() - t1, car_data, args.jsonfile, args.json_path, i), key, args.AMQPHost) i += 1 if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) ### 読み飛ばし処理を追加 ### if not args.jsonfile and args.skip: if fps <= 10: for _i in range(int(math.ceil(rfps / fps)) - 1): ret, frame = video_capture.read() if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def yolo_frames(unique_name): device = unique_name[1] tracking = True if tracking: gdet = import_module('tools.generate_detections') nn_matching = import_module('deep_sort.nn_matching') Tracker = import_module('deep_sort.tracker').Tracker # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None # deep_sort model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) yolo = YOLO() nms_max_overlap = 1.0 num_frames = 0 get_feed_from = ('camera', device) current_date = datetime.datetime.now().date() count_dict = {} # initiate dict for storing counts while True: cam_id, frame = BaseCamera.get_frame(get_feed_from) # image_height, image_width = frame.shape[:2] if frame is None: break num_frames += 1 if num_frames % 2 != 0: # only process frames at set number of frame intervals continue #image = Image.fromarray(frame) image = Image.fromarray(frame[..., ::-1]) # convert bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip(boxes, confidence, classes, features)] else: detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] class_counter = Counter() # store counts of each detected class if tracking: # Call the tracker tracker.predict() tracker.update(detections) track_count = int(0) # reset counter to 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1) # WHITE BOX cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) track_count += 1 # add 1 for each tracked object cv2.putText(frame, "Current total count: " + str(track_count), (int(20), int(60 * 5e-3 * frame.shape[0])), 0, 2e-3 * frame.shape[0], (255, 255, 255), 2) det_count = int(0) for det in detections: bbox = det.to_tlbr() score = "%.2f" % (det.confidence * 100) + "%" cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 1) # BLUE BOX if len(classes) > 0: cls = det.cls cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1.5e-3 * frame.shape[0], (0, 255, 0), 1) class_counter[cls] += 1 det_count += 1 # display counts for each class as they appear y = 80 * 5e-3 * frame.shape[0] for cls in class_counter: class_count = class_counter[cls] cv2.putText(frame, str(cls) + " " + str(class_count), (int(20), int(y)), 0, 2e-3 * frame.shape[0], (255, 255, 255), 2) y += 20 * 5e-3 * frame.shape[0] #TODO apply this to other text # use YOLO counts if tracking is turned off if tracking: count = track_count else: count = det_count # calculate current minute now = datetime.datetime.now() rounded_now = now - datetime.timedelta(microseconds=now.microsecond) # round to nearest second current_minute = now.time().minute if current_minute == 0 and len(count_dict) > 1: count_dict = {} # reset counts every hour else: # write counts to file for every set interval of the hour write_interval = 5 if current_minute % write_interval == 0: # write to file once only every write_interval minutes if current_minute not in count_dict: count_dict[current_minute] = True total_filename = 'Total counts for {}, camera {}.txt'.format(current_date, device) total_count_file = open(total_filename, 'a') print('Writing current total count ({}) to file.'.format(count)) total_count_file.write(str(rounded_now) + ", " + device + ', ' + str(count) + "\n") total_count_file.close() # if class exists in class counter, create file and write counts for cls in class_counter: class_count = class_counter[cls] class_filename = '{} counts for {}, camera {}.txt'.format(cls, current_date, device) class_count_file = open(class_filename, 'a') print('Writing current {} count ({}) to file.'.format(cls, class_count)) class_count_file.write(str(rounded_now) + ", " + device + ', ' + str(class_count) + "\n") class_count_file.close() yield cam_id, frame
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = False writeVideo_flag = True asyncVideo_flag = False file_path = ['out.mp4'] #file_path = ['veed.mp4'] cols = math.ceil(math.sqrt(len(file_path))) rows = math.ceil(len(file_path) / cols) singleHeight = int(screenHeight / rows) singleWidth = int(screenWidth / cols) out_image = np.zeros((screenHeight, screenWidth, 3), np.uint8) #if asyncVideo_flag : # video_capture = VideoCaptureAsync(file_path) #else: # video_capture = cv2.VideoCapture(file_path) video_captures = [] cameras = [] prvTimes = [] localgloballink = [] imgsSaved = 2 for i in range(len(file_path)): video_captures.append(cv2.VideoCapture(file_path[i])) cameras.append(Camera()) prvTimes.append(time.time()) #if asyncVideo_flag: # video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) h = int(video_capture.cap.get(4)) else: w = screenWidth h = screenHeight fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() frame = [] globalPersonCount = 1 for file in file_path: frame.append(None) curFrame = 1 gtIndex = 0 while True: allimages = [] for index in range(len(file_path)): cur = time.time() ret, frame[index] = video_captures[index].read( ) # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[index][..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame[index], boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip( boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression( boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame[index], str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1) #nabin's code hsvImage = cv2.cvtColor(frame[index], cv2.COLOR_BGR2HSV) hungarianmatrix = [] indexx = 0 if (len(cameras[index].PersonData) > 0): diff = cur - prvTimes[index] times = int(diff / 0.05) prvTimes[index] = cur for data in cameras[index].PersonData: if (data.kf != None): for i in range(times): data.kf.predict() nodata = len(cameras[index].PersonData) for z in range(len(cameras[index].PersonData)): cameras[index].PersonData[z].updated = False for det in detections: bbox = det.to_tlbr() if (nodata == 0): persondata = PersonData() persondata.color = [ int(random.randint(0, 255)), int(random.randint(0, 255)), int(random.randint(0, 255)) ] persondata.positions.append([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]) persondata.positions.append([(bbox[0] + bbox[2]) / 2 + 0.1, (bbox[1] + bbox[3]) / 2 + 0.1 ]) persondata.top = bbox[0] persondata.left = bbox[1] persondata.lastPosition = bbox persondata.localPersonIndex = cameras[ index].localPersonCount persondata.kf = KF(persondata.positions[0][0], persondata.positions[0][1], 0, 0) persondata.globalPersonIndex = globalPersonCount localgloballink.append([ globalPersonCount, index, persondata.localPersonIndex ]) globalPersonCount = globalPersonCount + 1 cameras[index].localPersonCount = cameras[ index].localPersonCount + 1 hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] persondata.histogram_h = cv2.calcHist([hsvCroppedImage], [0], None, [180], [0, 180]) persondata.histogram_h = np.divide(persondata.histogram_h, ((bbox[3] - bbox[1]) * (bbox[2] - bbox[0]))) cameras[index].PersonData.append(persondata) else: hungarianmatrix.append([]) hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] histogram_h = cv2.calcHist([hsvCroppedImage], [0], None, [180], [0, 180]) histogram_h = np.divide(histogram_h, ((bbox[3] - bbox[1]) * (bbox[2] - bbox[0]))) for z in range(len(cameras[index].PersonData)): postions = len( cameras[index].PersonData[z].positions) - 1 cov = np.cov( np.asarray( cameras[index].PersonData[z].positions).T) #mahal=(distance.mahalanobis([cameras[index].PersonData[z].kf.calulatedmean[0],cameras[index].PersonData[z].kf.calulatedmean[2]],[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2],cov))/ frame[index].shape[0] mahal = math.sqrt((cameras[index].PersonData[z]. positions[postions][0] - (bbox[0] + bbox[2]) / 2)**2 + (cameras[index].PersonData[z]. positions[postions][1] - (bbox[1] + bbox[3]) / 2)**2 ) / frame[index].shape[0] #mahal=math.sqrt((cameras[index].PersonData[z].kf.calulatedmean[0]-(bbox[0]+bbox[2])/2)**2+(cameras[index].PersonData[z].kf.calulatedmean[1]-(bbox[1]+bbox[3])/2)**2)/ frame[index].shape[0] #mahal=getMahalanbolisDist(cameras[index].PersonData[z].positions,[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2]) mahal += (np.sum( np.absolute( np.subtract( histogram_h, cameras[index].PersonData[z].histogram_h))) ) hungarianmatrix[indexx].append(mahal) indexx = indexx + 1 print(hungarianmatrix) if (nodata != 0): row_ind = [] col_ind = [] if (hungarianmatrix != []): row_ind, col_ind = linear_sum_assignment(hungarianmatrix) indexx = 0 for pos in range(len(col_ind)): if (hungarianmatrix[row_ind[pos]][col_ind[pos]] < 2 - detections[row_ind[pos]].confidence): bbox = detections[row_ind[pos]].to_tlbr() detections[row_ind[pos]].localProcessed = True cameras[index].PersonData[col_ind[pos]].updated = True cameras[index].PersonData[col_ind[pos]].top = bbox[0] cameras[index].PersonData[col_ind[pos]].left = bbox[1] cameras[index].PersonData[col_ind[pos]].kf.update([ (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2 ]) cameras[index].PersonData[ col_ind[pos]].lastPosition = bbox cameras[index].PersonData[ col_ind[pos]].positions.append([ (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2 ]) hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] toadd = detections[row_ind[pos]].confidence - 0.7 cameras[index].PersonData[ col_ind[pos]].histogram_h = np.add( np.multiply( cv2.calcHist([hsvCroppedImage], [0], None, [180], [0, 180]), toadd * 1 / (((bbox[3] - bbox[1]) * (bbox[2] - bbox[0])))), np.multiply( cameras[index].PersonData[ col_ind[pos]].histogram_h, 1 - toadd)) if (len(cameras[index].PersonData[ col_ind[pos]].positions) > 6): cameras[index].PersonData[ col_ind[pos]].positions.pop(0) for pos in range(len(detections)): if (hasattr(detections[pos], 'localProcessed') == False): bbox = detections[pos].to_tlbr() #if(bbox[1]>hsvImage.shape[0]): # continue ndata = PersonData() ndata.top = bbox[0] ndata.left = bbox[1] ndata.positions.append([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]) ndata.positions.append([(bbox[0] + bbox[2]) / 2 + 0.1, (bbox[1] + bbox[3]) / 2 + 0.1]) ndata.kf = KF((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2, 0, 0) ndata.color = [ int(random.randint(0, 255)), int(random.randint(0, 255)), int(random.randint(0, 255)) ] ndata.localPersonIndex = cameras[ index].localPersonCount ndata.lastPosition = bbox ndata.kf = KF(ndata.positions[0][0], ndata.positions[0][1], 0, 0) cameras[index].localPersonCount = cameras[ index].localPersonCount + 1 localgloballink.append( [globalPersonCount, index, ndata.localPersonIndex]) ndata.globalPersonIndex = globalPersonCount globalPersonCount = globalPersonCount + 1 hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] ndata.histogram_h = cv2.calcHist([hsvCroppedImage], [0], None, [180], [0, 180]) ndata.histogram_h = np.divide(ndata.histogram_h, ((bbox[3] - bbox[1]) * (bbox[2] - bbox[0]))) cameras[index].PersonData.append(ndata) #allimages.append([]) if (len(file_path)) != 1: for pdata in cameras[index].PersonData: if (pdata.updated): nimg = cv2.resize( frame[index][int(pdata.lastPosition[1] ):int(pdata.lastPosition[3]), int(pdata.lastPosition[0] ):int(pdata.lastPosition[2])], (64, 128), interpolation=cv2.INTER_AREA) #allimages[len(allimages)-1].append(np.array(nimg)) pdata.imgs.append(nimg) if (len(pdata.imgs) == imgsSaved + 1): pdata.imgs.pop(0) #nabin's code ends if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame[index], "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1) #if(len(cameras)==2): #globalHungarian=[] # for fdata in range(len(cameras[0].PersonData)): # globalHungarian.append([]) # for pdata in cameras[1].PersonData: # globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(pdata.histogram_h,cameras[0].PersonData[fdata].histogram_h)))) # # row_ind, col_ind = linear_sum_assignment(globalHungarian) # for row in range(len(row_ind)): # cv2.putText(frame[0], chr(ord('a')+row),(int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][0]), int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2) # cv2.putText(frame[1], chr(ord('a')+row),(int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][0]), int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2) if (len(cameras) == 1): hypos = [] hyposPos = [] for person in cameras[0].PersonData: if (person.updated == True): cv2.putText( frame[0], str(person.localPersonIndex), (int(person.positions[len(person.positions) - 1][0]), int(person.positions[len(person.positions) - 1][1])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1) if (person.updated == True): hypos.append(person.localPersonIndex + 1) hyposPos.append([person.top, person.left]) gts = [] gtsPos = [] while gt[gtIndex][0] == curFrame and gtIndex < len(gt): gts.append(gt[gtIndex][1]) gtsPos.append([gt[gtIndex][2], gt[gtIndex][3]]) gtIndex = gtIndex + 1 curFrame = curFrame + 1 dis = mm.distances.norm2squared_matrix(np.array(gtsPos), np.array(hyposPos)) acc.update(gts, hypos, dis) else: edges = [] globalHungarian = [] for i in range(len(cameras)): for j in range(i + 1, len(cameras)): globalHungarian = [] x = 0 xindexes = [] yindexes = [] stackedimgages = [] for pos in range(imgsSaved): stackedimgages.append([]) for person in cameras[j].PersonData: if (person.updated == True and len(person.imgs) == imgsSaved): stackedimgages[pos].append(person.imgs[pos]) for fdata in range(len(cameras[i].PersonData)): if (cameras[i].PersonData[fdata].updated == False or len(cameras[i].PersonData[fdata].imgs) != imgsSaved): continue xindexes.append(fdata) y = 0 triplet = test(cameras[i].PersonData[fdata].imgs[0], stackedimgages[0]) for pos in range(1, imgsSaved): triplet = np.add( triplet, test(cameras[i].PersonData[fdata].imgs[pos], stackedimgages[pos])) globalHungarian.append([]) for pdata in range(len(cameras[j].PersonData)): if (cameras[j].PersonData[pdata].updated == False or len(cameras[j].PersonData[pdata].imgs) != imgsSaved): continue #globalHungarian[x].append(triplet[y]) globalHungarian[x].append( np.sum( np.absolute( np.subtract( cameras[j].PersonData[pdata]. histogram_h, cameras[i]. PersonData[fdata].histogram_h))) * 2 + triplet[y]) if (x == 0): yindexes.append(pdata) #globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h)))) #globalHungarian[fdata].append(triplet[pdata]) y = y + 1 x = x + 1 if (len(globalHungarian) != 0): row_ind, col_ind = linear_sum_assignment( globalHungarian) print(globalHungarian) for pos in range(len(row_ind)): if (globalHungarian[row_ind[pos]][col_ind[pos]] < 3.2): edges.append( (cameras[i].PersonData[xindexes[ row_ind[pos]]].globalPersonIndex, cameras[j].PersonData[yindexes[ col_ind[pos]]].globalPersonIndex)) Allcliques = cliques(edges, len(cameras), globalPersonCount).getCliques() for cam in cameras: for person in cam.PersonData: isinclique = True for clique in Allcliques: if person.globalPersonIndex in clique: isinclique = False break if isinclique: Allcliques.append([person.globalPersonIndex]) for sclique in Allcliques: indexes = [] cur = min(sclique) for i in range(len(sclique)): isInclique = False prvIndex = cameras[localgloballink[ sclique[i] - 1][1]].PersonData[localgloballink[ sclique[i] - 1][2]].prvglobalFoundOutPersonIndex if prvIndex == -1: isInclique = True else: for snclique in Allcliques: if prvIndex in snclique: isInclique = True break if isInclique == True: cameras[localgloballink[sclique[i] - 1][1]].PersonData[ localgloballink[sclique[i] - 1] [2]].globalFoundOutPersonIndex = cur else: cameras[localgloballink[sclique[i] - 1][1]].PersonData[ localgloballink[sclique[i] - 1] [2]].globalFoundOutPersonIndex = prvIndex for cam in range(len(cameras)): for person in cameras[cam].PersonData: if person.updated == True: cv2.putText( frame[cam], str(person.globalFoundOutPersonIndex), (int(person.positions[len(person.positions) - 1][0]), int(person.positions[len(person.positions) - 1][1])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 2) for sclique in Allcliques: for i in range(len(sclique)): cameras[localgloballink[sclique[i] - 1][1]].PersonData[ localgloballink[sclique[i] - 1] [2]].prvglobalFoundOutPersonIndex = cameras[ localgloballink[sclique[i] - 1][1]].PersonData[ localgloballink[sclique[i] - 1] [2]].globalFoundOutPersonIndex out_image.fill(0) vindex = 0 for row in range(rows): for col in range(cols): if (vindex == len(file_path)): break vidshape = frame[vindex].shape curvidheightratio = vidshape[0] / singleHeight curvidwidthratio = vidshape[1] / singleWidth if (curvidwidthratio < curvidheightratio): #height is small resizedwidth = int(vidshape[1] / vidshape[0] * singleHeight) nimg = cv2.resize(frame[vindex], (resizedwidth, singleHeight), interpolation=cv2.INTER_AREA) widthpos = int( (singleWidth - resizedwidth) / 2) + col * singleWidth out_image[row * singleHeight:(row + 1) * singleHeight, widthpos:widthpos + resizedwidth] = nimg else: #width is small resizedheight = int(vidshape[0] / vidshape[1] * singleWidth) nimg = cv2.resize(frame[vindex], (singleWidth, resizedheight), interpolation=cv2.INTER_AREA) heightpos = int(((singleHeight - resizedheight) / 2) + row * singleHeight) out_image[heightpos:heightpos + resizedheight, col * singleWidth:(col + 1) * singleWidth] = nimg vindex = vindex + 1 if (len(cameras) == 1): mh = mm.metrics.create() summary = mh.compute(acc, metrics=['num_frames', 'mota', 'motp'], name='acc') print(summary) cv2.imshow('', out_image) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(out_image) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True writeVideo_flag = True asyncVideo_flag = False file_path = 'IMG_3326.MOV' dfObj = pd.DataFrame( columns=['frame_num', 'track', 'cx', 'cy', 'w', 'h', 'track_temp']) dfObjDTP = pd.DataFrame(columns=[ 'filename', 'frame_num', 'bb1', 'bb2', 'bb3', 'bb4', 'track', 'track_temp', 'Height' ]) if asyncVideo_flag: video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # frame shape 640*480*3 if ret != True: break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() #Ini buat cropping gambar per frame #cropped_image = frame[int(bbox[1]):int(bbox[1])+(int(bbox[3])-int(bbox[1])),int(bbox[0]):int(bbox[0])+(int(bbox[2])-int(bbox[0]))] cropped_image = frame[int(bbox[1]):int(bbox[1]) + 256, int(bbox[0]):int(bbox[0]) + 128] # cropped_image = frame[2:5,6:10] # Matiin atau comment biar ga ada box putih # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) # # cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, # 1.5e-3 * frame.shape[0], (0, 255, 0), 1) # print(cropped_image) dirname = "output_crop/{}".format(track.track_id) if not os.path.exists(dirname): os.makedirs(dirname) if (cropped_image.size == 0): continue else: writeStatus = cv2.imwrite( "output_crop/{}/frame_{}.png".format( track.track_id, frame_index), cropped_image) print("output_crop/{}/frame_{}.png".format( track.track_id, frame_index)) # Write CSV dfObj = dfObj.append(pd.Series([ frame_index, track.track_id, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]), track.time_since_update ], index=dfObj.columns), ignore_index=True) dfObjDTP = dfObjDTP.append(pd.Series([ file_path, frame_index, int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]), track.track_id, track.time_since_update, int(bbox[3]) - int(bbox[1]) ], index=dfObjDTP.columns), ignore_index=True) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" #Matiin atau comment biar ga ada box putih di crop image # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # if len(classes) > 0: # cls = det.cls # cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, # 1.5e-3 * frame.shape[0], (0, 255, 0), 1) cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1. / (time.time() - t1))) / 2 print("FPS = %f" % (fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() dfObj = dfObj.sort_values(["track", "frame_num"], ascending=(True, True)) dfObj.to_csv(r'result_temp.csv', index=False) dfObjDTP = dfObjDTP.sort_values(["track", "frame_num"], ascending=(True, True)) dfObjDTP.to_csv(r'result_temp_dtp.csv', index=False) convert_to_final() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) # Calculate cosine Distance Metric metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # Flags for process tracking = True # Set False if you only want to do detection writeVideo_flag = True # Set False if you don't want to write frames locally asyncVideo_flag = False # It uses asynchronous processing for better FPS :Warning: Shuttering Problem # Video File Path file_path = '/mydrive/test.mp4' # Check if asyncVideo flag set to True if asyncVideo_flag : video_capture = VideoCaptureAsync(file_path) else: video_capture = cv2.VideoCapture(file_path) if asyncVideo_flag: video_capture.start() if writeVideo_flag: if asyncVideo_flag: w = int(video_capture.cap.get(3)) h = int(video_capture.cap.get(4)) else: w = int(video_capture.get(3)) h = int(video_capture.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_file = os.path.basename(file_path)[:-4] out = cv2.VideoWriter('./Output/' + output_file + "-prueba-test.avi", fourcc, 30, (w, h)) frame_index = -1 fps = 0.0 fps_imutils = imutils.video.FPS().start() while True: ret, frame = video_capture.read() # Capture frames if ret != True: break t1 = time.time() # bgr to rgb frame conversion image = Image.fromarray(frame[...,::-1]) # YOLOv4 Detection boxes, confidence, classes = yolo.detect_image(image) if tracking: # Encodes the frame and boxes for DeepSORT features = encoder(frame, boxes) # DeepSORT Detection detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip(boxes, confidence, classes, features)] else: # Only YOLOv4 Detection detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() # Draw white bbox for DeepSORT cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5,(0, 255, 0), 1) for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) # Check the class for colored bbox if len(classes) > 0: cls = det.cls center_bbox = (int(bbox[2]), int(bbox[2])) if str(cls) == 'person': # Draw Blue bbox for YOLOv4 person detection cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (219, 152, 52), 2) elif str(cls) == 'backpack' or 'handbag' or 'suitcase': # Draw Orange bbox for YOLOv4 handbag, backpack and suitcase detection cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (65, 176, 245), 2) if not asyncVideo_flag: fps = (fps + (1./(time.time()-t1))) / 2 print("FPS = %f"%(fps)) cv2.putText(frame, "GPU: NVIDIA Tesla P100", (5, 70), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) cv2.putText(frame, "FPS: %.2f" % fps, (5, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) # draw the timestamp on the frame timestamp = datetime.datetime.now() ts = timestamp.strftime("%d/%m/%Y, %H:%M:%S") cv2.putText(frame, ts, (5, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.8, (0, 255, 0), 1) #cv2.imshow('', frame) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(frame) frame_index = frame_index + 1 fps_imutils.update() # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release()
def main(yolo): max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = True while True: print("Waiting...") (rpiName, frame) = imageHub.recv_image() print("Image Received...") t1 = time.time() ids = {} image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) if tracking: features = encoder(frame, boxes) detections = [ Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip( boxes, confidence, classes, features) ] else: detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() ids[track.track_id] = bbox.tolist() for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" print(time.time() - t1) if cv2.waitKey(1) & 0xFF == ord('q'): break imageHub.send_reply(json.dumps(ids).encode('ascii')) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 # Deep SORT model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) tracking = False writeVideo_flag = True asyncVideo_flag = False #file path for videos input file_path = ['out.mp4'] #calulating number of row and columns based on number of videos input cols=math.ceil(math.sqrt(len(file_path))) rows=math.ceil(len(file_path)/cols) #calulating single video hwight and width based on number of rows/cols and screen width/height singleHeight=int(screenHeight/rows) singleWidth=int(screenWidth/cols) #out_image sent to the screen and file written out_image=np.zeros((screenHeight,screenWidth,3), np.uint8) #if asyncVideo_flag : # video_capture = VideoCaptureAsync(file_path) #else: # video_capture = cv2.VideoCapture(file_path) #videos reference to get index later video_captures = [] #number of videos/cameras fed. Used to save person data cameras=[] #previous time kalman filter was processed #prvTimes=[] #array to link local index to global person index localgloballink=[] #number of images saved after a person has been tracked in a single camera imgsSaved=6 #initializing cameras and video_capture variables for i in range(len(file_path)): video_captures.append(cv2.VideoCapture(file_path[i])) cameras.append(Camera()) #prvTimes.append(time.time()) #if asyncVideo_flag: # video_capture.start() if writeVideo_flag: #if asyncVideo_flag: # w = int(video_capture.cap.get(3)) # h = int(video_capture.cap.get(4)) #else: #setting width and height of video file written w = screenWidth h = screenHeight fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) #number of frames processed till now frame_index = -1 #fps fps = 0.0 fps_imutils = imutils.video.FPS().start() #frames found in the current run frame=[] #initializing the frame variables for file in file_path: frame.append(None) #the global person count globalPersonCount=1 #frame count for testing with motmetrics curFrame=1; #global person index gtIndex=0; #variable to count the current images saved cur_save_count=0 prvGlobalIndexData=[] while True: cur_save_count=cur_save_count+1 #image saved in current run #allimages=[] for index in range(len(file_path)): #getting current time for kalman filter #cur=time.time() #reading a frame from video ret, frame[index] = video_captures[index].read() # frame shape 640*480*3 if ret != True: break #getting current time for file output t1 = time.time() #changing image from bgr to rgb image = Image.fromarray(frame[index][...,::-1]) # bgr to rgb #running yolo boxes, confidence, classes = yolo.detect_image(image) #Getting bounding boxes from image data if tracking: features = encoder(frame[index], boxes) detections = [Detection(bbox, confidence, cls, feature) for bbox, confidence, cls, feature in zip(boxes, confidence, classes, features)] else: detections = [Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] #Got bounding boxes from image data #writing person detection accuracy and putting a bounding boxes around people for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) if len(classes) > 0: cls = det.cls cv2.putText(frame[index], str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1) #changing rgb image data to hsv for hsv histogram hsvImage = cv2.cvtColor(frame[index], cv2.COLOR_BGR2HSV) #the local hungarian matrix hungarianmatrix=[] #index for hungerian matrix indexx=0 #if(len(cameras[index].PersonData)>0): # diff=cur-prvTimes[index] # times=int(diff/0.05) # prvTimes[index]=cur # for data in cameras[index].PersonData: # if(data.kf!=None): # for i in range(times): # data.kf.predict() #checking the number of previous person data stored nodata=len(cameras[index].PersonData); #Setting all person updated variable to false for z in range(len(cameras[index].PersonData)): cameras[index].PersonData[z].updated=False; #iterating through current detections for det in detections: #getting top, left, bottom and right co-ordinated from bounding boxes bbox = det.to_tlbr() #checking if there was no previous person data, initializing all found out persons directly to camera's person data variable if(nodata==0): persondata=PersonData() #persondata.color=[int(random.randint(0,255)),int(random.randint(0,255)),int(random.randint(0,255))] persondata.positions.append([(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2]) persondata.positions.append([(bbox[0]+bbox[2])/2+0.1,(bbox[1]+bbox[3])/2+0.1]) #persondata.top=bbox[0] #persondata.left=bbox[1] persondata.lastPosition=bbox persondata.localPersonIndex=cameras[index].localPersonCount; #persondata.kf=KF(persondata.positions[0][0],persondata.positions[0][1],0,0) persondata.globalPersonIndex=globalPersonCount; localgloballink.append([globalPersonCount,index,persondata.localPersonIndex]) globalPersonCount=globalPersonCount+1 cameras[index].localPersonCount=cameras[index].localPersonCount+1; hsvCroppedImage=hsvImage[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] persondata.histogram_h = cv2.calcHist([hsvCroppedImage],[0],None,[180],[0,180]) #dividing the hisogram by area so that person bounding box area wont alter histogram values persondata.histogram_h = np.divide(persondata.histogram_h,((bbox[3]-bbox[1])*(bbox[2]-bbox[0]))) #adding newly created person object to camera cameras[index].PersonData.append(persondata) else: hungarianmatrix.append([]) #getting current hsv value from current frame hsvCroppedImage=hsvImage[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] histogram_h = cv2.calcHist([hsvCroppedImage],[0],None,[180],[0,180]) histogram_h = np.divide(histogram_h,((bbox[3]-bbox[1])*(bbox[2]-bbox[0]))) for z in range(len(cameras[index].PersonData)): postions=len(cameras[index].PersonData[z].positions)-1 cov = np.cov(np.asarray(cameras[index].PersonData[z].positions).T) #mahal=(distance.mahalanobis([cameras[index].PersonData[z].kf.calulatedmean[0],cameras[index].PersonData[z].kf.calulatedmean[2]],[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2],cov))/ frame[index].shape[0] mahal=math.sqrt((cameras[index].PersonData[z].positions[postions][0]-(bbox[0]+bbox[2])/2)**2+(cameras[index].PersonData[z].positions[postions][1]-(bbox[1]+bbox[3])/2)**2)/ frame[index].shape[0] #mahal=math.sqrt((cameras[index].PersonData[z].kf.calulatedmean[0]-(bbox[0]+bbox[2])/2)**2+(cameras[index].PersonData[z].kf.calulatedmean[1]-(bbox[1]+bbox[3])/2)**2)/ frame[index].shape[0] #mahal=getMahalanbolisDist(cameras[index].PersonData[z].positions,[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2]) mahal+=(np.sum(np.absolute(np.subtract(histogram_h,cameras[index].PersonData[z].histogram_h)))) hungarianmatrix[indexx].append(mahal) indexx=indexx+1 #print(hungarianmatrix) if(nodata!=0): row_ind=[] col_ind=[] if(hungarianmatrix!=[]): row_ind, col_ind = linear_sum_assignment(hungarianmatrix) indexx=0; for pos in range(len(col_ind)): if(hungarianmatrix[row_ind[pos]][col_ind[pos]]<2-detections[row_ind[pos]].confidence): bbox=detections[row_ind[pos]].to_tlbr() detections[row_ind[pos]].localProcessed=True cameras[index].PersonData[col_ind[pos]].updated=True cameras[index].PersonData[col_ind[pos]].top=bbox[0] cameras[index].PersonData[col_ind[pos]].left=bbox[1] #cameras[index].PersonData[col_ind[pos]].kf.update([(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2]) cameras[index].PersonData[col_ind[pos]].lastPosition=bbox cameras[index].PersonData[col_ind[pos]].positions.append([(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2]) hsvCroppedImage=hsvImage[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] toadd=detections[row_ind[pos]].confidence-0.7 cameras[index].PersonData[col_ind[pos]].histogram_h = np.add(np.multiply(cv2.calcHist([hsvCroppedImage],[0],None,[180],[0,180]),toadd*1/(((bbox[3]-bbox[1])*(bbox[2]-bbox[0])))),np.multiply(cameras[index].PersonData[col_ind[pos]].histogram_h,1-toadd)) if(len(cameras[index].PersonData[col_ind[pos]].positions)>6): cameras[index].PersonData[col_ind[pos]].positions.pop(0); for pos in range(len(detections)): if(hasattr(detections[pos], 'localProcessed')==False): bbox = detections[pos].to_tlbr() #if(bbox[1]>hsvImage.shape[0]): # continue ndata=PersonData() ndata.top=bbox[0] ndata.left=bbox[1] ndata.positions.append([(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2]) ndata.positions.append([(bbox[0]+bbox[2])/2+0.1,(bbox[1]+bbox[3])/2+0.1]) #ndata.kf=KF((bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2,0,0) ndata.color=[int(random.randint(0,255)),int(random.randint(0,255)),int(random.randint(0,255))] ndata.localPersonIndex=cameras[index].localPersonCount ndata.lastPosition=bbox ndata.kf=KF(ndata.positions[0][0],ndata.positions[0][1],0,0) cameras[index].localPersonCount=cameras[index].localPersonCount+1 localgloballink.append([globalPersonCount,index,ndata.localPersonIndex]) ndata.globalPersonIndex=globalPersonCount; globalPersonCount=globalPersonCount+1 hsvCroppedImage=hsvImage[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] ndata.histogram_h = cv2.calcHist([hsvCroppedImage],[0],None,[180],[0,180]) ndata.histogram_h = np.divide(ndata.histogram_h,((bbox[3]-bbox[1])*(bbox[2]-bbox[0]))) cameras[index].PersonData.append(ndata) #allimages.append([]) #if(len(file_path))!=1: # for pdata in cameras[index].PersonData: # if(pdata.updated): # nimg=cv2.resize(frame[index][int(pdata.lastPosition[1]):int(pdata.lastPosition[3]),int(pdata.lastPosition[0]):int(pdata.lastPosition[2])], (64,128 # ), interpolation = cv2.INTER_AREA) # #allimages[len(allimages)-1].append(np.array(nimg)) # pdata.imgs.append(nimg); # if(len(pdata.imgs)==imgsSaved+1): # pdata.imgs.pop(0) #nabin's code ends if tracking: # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame[index], "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1) #if(len(cameras)==2): #globalHungarian=[] # for fdata in range(len(cameras[0].PersonData)): # globalHungarian.append([]) # for pdata in cameras[1].PersonData: # globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(pdata.histogram_h,cameras[0].PersonData[fdata].histogram_h)))) # # row_ind, col_ind = linear_sum_assignment(globalHungarian) # for row in range(len(row_ind)): # cv2.putText(frame[0], chr(ord('a')+row),(int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][0]), int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2) # cv2.putText(frame[1], chr(ord('a')+row),(int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][0]), int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2) if(len(cameras)==1): hypos=[]; hyposPos=[]; for person in cameras[0].PersonData: if(person.updated==True): cv2.putText(frame[0],str(person.localPersonIndex) ,(int(person.positions[len(person.positions)-1][0]), int(person.positions[len(person.positions)-1][1])),0, 1e-3 * frame[index].shape[0], (0,255,0),1) if(person.updated==True): hypos.append(person.localPersonIndex+1) hyposPos.append([person.top,person.left]) gts=[] gtsPos=[] while gt[gtIndex][0]==curFrame and gtIndex<len(gt): gts.append(gt[gtIndex][1]) gtsPos.append([gt[gtIndex][2],gt[gtIndex][3]]) gtIndex=gtIndex+1 curFrame=curFrame+1 dis=mm.distances.norm2squared_matrix(np.array(gtsPos), np.array(hyposPos)) acc.update(gts,hypos,dis) elif imgsSaved==cur_save_count: cur_save_count=0 edges=[] globalHungarian=[] for i in range(len(cameras)): for j in range(i+1,len(cameras)): globalHungarian=[] x=0 xindexes=[] yindexes=[] stackedimgages=[] for pos in range(imgsSaved): stackedimgages.append([]) for person in cameras[j].PersonData: if(person.updated==True and len(person.imgs)==imgsSaved): stackedimgages[pos].append(person.imgs[pos]) for fdata in range(len(cameras[i].PersonData)): #if(cameras[i].PersonData[fdata].updated==False or len(cameras[i].PersonData[fdata].imgs)!=imgsSaved): if(cameras[i].PersonData[fdata].updated==False or len(cameras[i].PersonData[fdata].imgs)!=imgsSaved): continue xindexes.append(fdata) curclique=[] prvfoundout=-1 if len(prvGlobalIndexData)!=0: for single in prvGlobalIndexData: if cameras[i].PersonData[fdata].globalPersonIndex in single: curclique=single prvfoundout=single[0] break; y=0 #triplet=test(cameras[i].PersonData[fdata].imgs[0],stackedimgages[0]) #for pos in range(1,imgsSaved): # triplet=np.add(triplet,test(cameras[i].PersonData[fdata].imgs[pos],stackedimgages[pos])) globalHungarian.append([]) for pdata in range(len(cameras[j].PersonData)): if(cameras[j].PersonData[pdata].updated==False or len(cameras[j].PersonData[pdata].imgs)!=imgsSaved): continue #globalHungarian[x].append(triplet[y]) #val=(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h)))+triplet[y])/(0.9+1.4*imgsSaved)#hsv seems to be max 0.9, triplet max seems to be 1.2 #if cameras[j].PersonData[pdata].globalPersonIndex in curclique or cameras[j].PersonData[pdata].prvglobalFoundOutPersonIndex==prvfoundout: # val-=0.2 #globalHungarian[x].append(val) globalHungarian[x].append(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h)))) if(x==0): yindexes.append(pdata) #globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h)))) #globalHungarian[fdata].append(triplet[pdata]) y=y+1 x=x+1 if(len(globalHungarian)!=0): row_ind, col_ind = linear_sum_assignment(globalHungarian) print(globalHungarian) for pos in range(len(row_ind)): if(globalHungarian[row_ind[pos]][col_ind[pos]]<0.85): edges.append((cameras[i].PersonData[xindexes[row_ind[pos]]].globalPersonIndex,cameras[j].PersonData[yindexes[col_ind[pos]]].globalPersonIndex)) Allcliques=cliques(edges,len(cameras),globalPersonCount).getCliques() for cam in cameras: for person in cam.PersonData: if len(person.imgs)!=imgsSaved or person.updated==False: continue isinclique=True for clique in Allcliques: if person.globalPersonIndex in clique: isinclique=False break if isinclique: Allcliques.append([person.globalPersonIndex]) prvGlobalIndexData=[] for sclique in Allcliques: indexes=[] cur=min(sclique) prvGlobalIndexData.append([]) for i in range(len(sclique)): isInclique=False prvIndex=cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].prvglobalFoundOutPersonIndex if prvIndex==-1: isInclique=True else: for snclique in Allcliques: if prvIndex in snclique: isInclique=True break; if isInclique==True: cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].globalFoundOutPersonIndex=cur else: cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].globalFoundOutPersonIndex=prvIndex prvGlobalIndexData[len(prvGlobalIndexData)-1].append(prvIndex) prvGlobalIndexData[len(prvGlobalIndexData)-1].append(sclique[i]) for cam in range(len(cameras)): for person in cameras[cam].PersonData: if person.updated==True: cv2.putText(frame[cam],str(person.globalFoundOutPersonIndex) ,(int(person.positions[len(person.positions)-1][0]), int(person.positions[len(person.positions)-1][1])),0, 1e-3 * frame[index].shape[0], (0,255,0),2) for sclique in Allcliques: for i in range(len(sclique)): cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].prvglobalFoundOutPersonIndex=cameras[localgloballink[sclique[i]-1][1]].PersonData[localgloballink[sclique[i]-1][2]].globalFoundOutPersonIndex else: for cam in range(len(cameras)): for person in cameras[cam].PersonData: if person.updated==True: cv2.putText(frame[cam],str(person.globalFoundOutPersonIndex) ,(int(person.positions[len(person.positions)-1][0]), int(person.positions[len(person.positions)-1][1])),0, 1e-3 * frame[index].shape[0], (0,255,0),2) out_image.fill(0) vindex=0; for row in range(rows): for col in range(cols): if(vindex==len(file_path)): break vidshape=frame[vindex].shape curvidheightratio=vidshape[0]/singleHeight curvidwidthratio=vidshape[1]/singleWidth if(curvidwidthratio<curvidheightratio): #height is small resizedwidth=int(vidshape[1]/vidshape[0]*singleHeight) nimg=cv2.resize(frame[vindex], (resizedwidth,singleHeight), interpolation = cv2.INTER_AREA) widthpos=int((singleWidth-resizedwidth)/2)+col*singleWidth out_image[row*singleHeight:(row+1)*singleHeight,widthpos:widthpos+resizedwidth]=nimg else: #width is small resizedheight=int(vidshape[0]/vidshape[1]*singleWidth) nimg=cv2.resize(frame[vindex], (singleWidth,resizedheight), interpolation = cv2.INTER_AREA) heightpos=int(((singleHeight-resizedheight)/2)+row*singleHeight) out_image[heightpos:heightpos+resizedheight,col*singleWidth:(col+1)*singleWidth]=nimg vindex=vindex+1 if(len(cameras)==1): mh = mm.metrics.create() summary = mh.compute(acc, metrics=['num_frames', 'mota', 'motp'], name='acc') print(summary) cv2.imshow('', out_image) if writeVideo_flag: # and not asyncVideo_flag: # save a frame out.write(out_image) frame_index = frame_index + 1 fps_imutils.update() if not asyncVideo_flag: fps = (fps + (1./(time.time()-t1))) / 2 print("FPS = %f"%(fps)) # Press Q to stop! if cv2.waitKey(1) & 0xFF == ord('q'): break fps_imutils.stop() print('imutils FPS: {}'.format(fps_imutils.fps())) if asyncVideo_flag: video_capture.stop() else: video_capture.release() if writeVideo_flag: out.release() cv2.destroyAllWindows()
def main(yolo): # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 folder = "/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/scraped_images/google" out_folder = "/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/processed_images" filenames = os.listdir(folder) images = load_images_from_folder(folder) j = -1 num = 0 # writeVideo_flag = True # asyncVideo_flag = False # # file_path = 'video.webm' # if asyncVideo_flag: # video_capture = VideoCaptureAsync(file_path) # else: # video_capture = cv2.VideoCapture(file_path) # # if asyncVideo_flag: # video_capture.start() # # if writeVideo_flag: # if asyncVideo_flag: # w = int(video_capture.cap.get(3)) # h = int(video_capture.cap.get(4)) # else: # w = int(video_capture.get(3)) # h = int(video_capture.get(4)) # fourcc = cv2.VideoWriter_fourcc(*'XVID') # out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h)) # frame_index = -1 # fps = 0.0 # fps_imutils = imutils.video.FPS().start() for frame in images: j = j + 1 # ret, frame = video_capture.read() # frame shape 640*480*3 # if ret != True: # break t1 = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb boxes, confidence, classes = yolo.detect_image(image) detections = [ Detection_YOLO(bbox, confidence, cls) for bbox, confidence, cls in zip(boxes, confidence, classes) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] out_filename = 1 for det in detections: bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) # crop_img = frame.crop([int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]) w = int(bbox[2]) h = int(bbox[3]) y = int(int(bbox[1])) x = int(int(bbox[0])) crop_img = frame[y:h, x:w] print(str(out_filename) + filenames[j]) cv2.imwrite(out_folder + "/" + str(out_filename) + filenames[j], crop_img) num = num + 1 out_filename = out_filename + 1 # if len(classes) > 0: # cls = det.cls # cv2.putText(frame, str(cls) + " " + score, (int(bbox[0]), int(bbox[3])), 0, # 1e-3 * frame.shape[0], (0, 255, 0), 1) #crop_img = frame[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] #res_txt = demo_par(model_par, valid_transform, crop_img) #draw.rectangle(xy=person_bbox[:-1], outline='red', width=1) # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) # cv2.putText(frame, "ID: " + str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, # 1e-3 * frame.shape[0], (0, 255, 0), 1) # font = ImageFont.truetype( # '/home/sohaibrabbani/PycharmProjects/Strong_Baseline_of_Pedestrian_Attribute_Recognition/arial.ttf', # size=10) # positive_cnt = 1 # for txt in res_txt: # if 'personal' in txt: # #draw.text((x1, y1 + 20 * positive_cnt), txt, (255, 0, 0), font=font) # cv2.putText(frame, txt, (int(bbox[0]), int(bbox[1]) + 20 * positive_cnt), 0, # 1e-3 * frame.shape[0], (0, 255, 0), 1) # positive_cnt += 1 # cv2.imshow('', frame) cv2.destroyAllWindows()