def drawing(frame_queue, detections_queue, fps_queue): random.seed(3) # deterministic bbox colors video = set_saved_video(cap, args.out_filename, (width, height)) while cap.isOpened(): frame_resized = frame_queue.get() detections = detections_queue.get() fps = fps_queue.get() if frame_resized is not None: image = darknet.draw_boxes(detections, frame_resized, class_colors) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #預設剪中間背景當人臉 face = image[160:390, 200:430] #如果有人臉座標就取代預設背景 if detections: for label, confidence, bbox in detections: left, top, right, bottom = darknet.bbox2points(bbox) face = image[top:bottom, left:right] #if args.out_filename is not None: #video.write(image) if not args.dont_show: cv2.imshow('Inference', image) cv2.imshow('face', face) if cv2.waitKey(fps) == 27: break cap.release() video.release() cv2.destroyAllWindows()
def detect(self, frame): darknet_image = dn.make_image(self.width, self.height, 3) img_resized = cv2.resize(frame, (self.width, self.height), interpolation=cv2.INTER_LINEAR) # get image ratios to convert bounding boxes to proper size img_height, img_width, _ = frame.shape width_ratio = img_width / self.width height_ratio = img_height / self.height # run model on darknet style image to get detections dn.copy_image_from_bytes(darknet_image, img_resized.tobytes()) detections = dn.detect_image(self.network, self.class_names, darknet_image) dn.free_image(darknet_image) results = [] for label, confidence, bbox in detections: if float(confidence) <= 98.0: continue left, top, right, bottom = dn.bbox2points(bbox) left, top, right, bottom = int(left * width_ratio), int(top * height_ratio), \ int(right * width_ratio), int(bottom * height_ratio) results.append((confidence, (left, top, right, bottom))) if len(results) > 0: return max(results)[1] else: return None
def detect_variables(cfg_file='cfg/var_det.cfg', data='data/var_det.data', weights='backup/variable_detection.weights', path_to_imgs='./cropped_imgs', dest_path='./final_imgs'): """ input: path to cfg-file, path to data-file, path to weights, path to image folder applies yolov4 model to each image to detect variables and path coefficients in a (cropped) SEM figure """ os.system(f'mkdir {dest_path}') colors = {'c': (249, 69, 252), 'i': (241, 200, 98), 'p': (88, 255, 145)} for id in os.listdir(path_to_imgs): if id[-3:] == 'jpg': print(f"Processing image {id[:-4]}") path = path_to_imgs + '/' + id path_to_txt = path_to_imgs + '/' + id[:-3] + 'txt' os.system( f'./darknet detector test {data} {cfg_file} {weights} {path} -save_labels -dont_show' ) image = cv2.imread(path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) width, height = Image.fromarray( image, 'RGB').convert('L').size[0], Image.fromarray( image, 'RGB').convert('L').size[1] with open(path_to_txt, 'r') as f: contents = f.read() contents = [ line.split(' ') for line in contents.split('\n') if len(line) > 0 ] detections = [[ width * float(line[1]), height * float(line[2]), width * float(line[3]), height * float(line[4]) ] for line in contents] detections = [[ contents[i][0], darknet.bbox2points(detections[i]) ] for i in range(len(detections))] detections = correct_confusions(detections) for i in range(len(detections)): x1, y1, x2, y2 = detections[i][1] if detections[i][0] == '0': cv2.rectangle(image, (x1, y1), (x2, y2), color=colors['c'], thickness=2) elif detections[i][0] == '1': cv2.rectangle(image, (x1, y1), (x2, y2), color=colors['i'], thickness=2) else: cv2.rectangle(image, (x1, y1), (x2, y2), color=colors['p'], thickness=2) cv2.imwrite(dest_path + '/' + id, image) cv2_imshow(image)
def save_one_patch_detection_json(patch, detections, class_names, save_res_json, b_percent=False): # patch (xoff,yoff ,xsize, ysize) objects = [] bbox_list = [] for label, confidence, bbox in detections: # print('yolo relative',bbox, confidence,label) bbox = darknet.bbox2points(bbox) # to [xmin, ymin, xmax, ymax] # print('yolo xmin, ymin, xmax, ymax',bbox, confidence,label) #make sure # xmin >=0, ymin >=0, xmax<=xsize, ymax <= yszie bbox = [ max(bbox[0], 0), max(bbox[1], 0), min(bbox[2], patch[2]), min(bbox[3], patch[3]) ] # sometime, darknet went wrong, output many duplicate boxes, remove them if bbox in bbox_list: # print('remove duplicated') continue else: bbox_list.append(bbox) # sometime, darknet went wrong, output very thin box if bbox[0] == bbox[2] or bbox[1] == bbox[3]: # print('xmin == xmax or ymin=ymax') continue bbox = [ bbox[0] + patch[0], bbox[1] + patch[1], bbox[2] + patch[0], bbox[3] + patch[1] ] # to entire image coordinate if b_percent: confidence = round(confidence * 100, 2) object = { 'class_id': class_names.index(label), 'name': label, 'bbox': bbox, 'confidence': confidence } objects.append(object) json_data = json.dumps(objects, indent=2) with open(save_res_json, "w") as f_obj: f_obj.write(json_data)
def rescale(detections, image, source_shape): s_w, s_h = source_shape # source, eg 416x416 t_h, t_w, _ = image.shape # target w_scale = float(t_w) / s_w h_scale = float(t_h) / s_h res = [] for label, confidence, bbox in detections: x, y, w, h = bbox x = x * w_scale y = y * h_scale w = w * w_scale h = h * h_scale left, top, right, bottom = darknet.bbox2points((x, y, w, h)) res.append((left, top, right - left, bottom - top, confidence)) return res
def get_detections(detections, image, source_shape): s_w, s_h = source_shape # source, eg 416x416 t_h, t_w, _ = image.shape # target w_scale = float(t_w) / s_w h_scale = float(t_h) / s_h dets = [] for label, confidence, bbox in detections: x, y, w, h = bbox x = x * w_scale y = y * h_scale w = w * w_scale h = h * h_scale left, top, right, bottom = darknet.bbox2points((x, y, w, h)) dets.append([left, top, right, bottom, confidence]) return np.asarray(dets, dtype=np.float32)
def detect_figures(cfg_file='cfg/fig_det.cfg', data='data/fig_det.data', weights='backup/fig_det.weights', path_to_imgs='./temp_imgs', dest_path='./cropped_imgs'): """ input: path to cfg-file, path to data-file, path to weights, path to image folder applies yolov4 model to each image to detect SEM figures in a given pdf page """ os.system(f'mkdir {dest_path}') for id in os.listdir(path_to_imgs): if id[-3:] == 'jpg': print(f"Processing image {id[:-4]}") path = path_to_imgs + '/' + id path_to_txt = path_to_imgs + '/' + id[:-3] + 'txt' os.system( f'./darknet detector test {data} {cfg_file} {weights} {path} -save_labels -dont_show' ) image = cv2.imread(path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) width, height = Image.fromarray( image, 'RGB').convert('L').size[0], Image.fromarray( image, 'RGB').convert('L').size[1] with open(path_to_txt, 'r') as f: contents = f.read() contents = [ line.split(' ') for line in contents.split('\n') if len(line) > 0 ] detections = [[ width * float(line[1]), height * float(line[2]), width * float(line[3]), height * float(line[4]) ] for line in contents] detections = [[ contents[i][0], darknet.bbox2points(detections[i]) ] for i in range(len(detections)) if contents[i][0] == '0'] for i in range(len(detections)): x1, y1, x2, y2 = detections[i][1] roi = image[y1:y2, x1:x2] cv2.imwrite(dest_path + f'/{i}_' + id, roi) cv2_imshow(roi)
def draw_boxes(detections, image, source_shape): s_w, s_h = source_shape # source, eg 416x416 t_h, t_w, _ = image.shape # target w_scale = float(t_w) / s_w h_scale = float(t_h) / s_h for label, confidence, bbox in detections: x, y, w, h = bbox x = x * w_scale y = y * h_scale w = w * w_scale h = h * h_scale left, top, right, bottom = darknet.bbox2points((x, y, w, h)) cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 1) cv2.putText(image, "{} [{:.2f}]".format(label, float(confidence)), (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) return image
def save_annotations(name, network, image, detections, class_names): """ Files saved with image_name.txt and relative coordinates """ image = cv2.imread(name) width = darknet.network_width(network) height = darknet.network_height(network) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_resized = cv2.resize(image_rgb, (width, height), interpolation=cv2.INTER_LINEAR) print('big_image ', image.shape) print('small_image ', image_resized.shape) # image = orig_image scale_x = image.shape[1] / image_resized.shape[1] scale_y = image.shape[0] / image_resized.shape[0] file_name = name.split(".")[:-1][0] + ".txt" print(file_name) with open(file_name, "w") as f: print(file_name, len(detections)) if (len(detections) == 0): f.write("0,0,0,0,0,0,-1,-1\n") else: for label, confidence, bbox in detections: left, top, right, bottom = darknet.bbox2points( bbox, scale_x, scale_y) x = left y = top w = right - left h = bottom - top # x, y, w, h = convert2relative(image, bbox) print(x, y, w, h, float(0.99), label, -1, -1) label = class_names.index(label) f.write( "{:.4f},{:.4f},{:.4f},{:.4f},{:.4f},{},{:n},{:n}\n".format( x, y, w, h, float(0.99), label, -1, -1))
def inference(flow_id: str, frame: object): image = frame_data_2_bytes(frame, width, height) darknet.copy_image_from_bytes(darknet_image, image) detections = darknet.detect_image(network, class_names, darknet_image, thresh=thresh) result = PyDetectionBox(frame_id=frame.frame_id, engine_id='darknet') for label, confidence, bbox in detections: left, top, right, bottom = darknet.bbox2points(bbox) result.add_box(category_id=labels_rev.get(label, ''), category_label=label, x1=left, y1=top, x2=right, y2=left, probability=float(confidence)) darknet.free_image(darknet_image) return flow_id, result
def drawing(self): name_color = (138, 43, 226) random.seed(3) # deterministic bbox colors #video = set_saved_video(cap, args.out_filename, (width, height)) while self.cap.isOpened(): frame_resized = self.frame_queue.get() detections = self.detections_queue.get() fps = self.fps_queue.get() if frame_resized is not None: #畫框 + 畫標籤 image = darknet.draw_boxes(detections, frame_resized, self.class_colors) if self.Recognition_check == True: if detections: for label, confidence, bbox in detections: left, top, right, bottom = darknet.bbox2points( bbox) face = image[top:bottom, left:right] self.face = face #進行人臉辨識 t1 = cv2.getTickCount() scaled_arr = None try: scaled_arr = cv2_face(face) except: scaled_arr = None if scaled_arr is not None: feed_dict = { images_placeholder: scaled_arr, phase_train_placeholder: False, keep_probability_placeholder: 1.0 } embs = sess.run(embeddings, feed_dict=feed_dict) face_class = ['Others'] diff = [] #尋找最相近的人臉特徵 for emb in emb_arr: diff.append( np.mean(np.square(embs[0] - emb))) min_diff = min(diff) index = np.argmin(diff) if min_diff < THRED: face_class[0] = class_arr[index] t2 = cv2.getTickCount() t = (t2 - t1) / cv2.getTickFrequency() #把人名印在圖片上 cv2.putText(image, '{}'.format(face_class[0]), (left, top - 35), cv2.FONT_HERSHEY_SIMPLEX, 1, name_color, 2) ntime = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()) if self.clock % 100 == 0: self.recorded_people.clear() print("recorded_people = ", self.recorded_people) if face_class[0] != 'Others' and face_class[ 0] not in self.recorded_people: if self.clock % 20 == 0: #人名記錄起來 self.recorded_people.append( face_class[0]) #上傳資料庫search表 record_data = { "user_name": face_class[0], "time": ntime, "mask": label } conn = requests.post( "http://140.136.150.100/record.php", data=record_data) #print(face_class[0],ntime) #print(conn.text) #印上辨識時間 & 誤差 cv2.putText(image, '{:.4f}'.format(t), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) cv2.putText(image, '{:.4f}'.format(min_diff), (100, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) self.YOLO_image_queue.put(image) #把RGB圖片存起來 self.clock += 1 #print("cloock=",self.clock) if cv2.waitKey(fps) == 27: break self.cap.release() print("Thread 3 stop") cv2.destroyAllWindows()
def drawing(self): name_color = (138,43,226) random.seed(3) # deterministic bbox colors #video = set_saved_video(cap, args.out_filename, (width, height)) while self.cap.isOpened(): frame_resized = self.frame_queue.get() detections = self.detections_queue.get() fps = self.fps_queue.get() if frame_resized is not None: #畫框 + 畫標籤 image = darknet.draw_boxes(detections, frame_resized, self.class_colors) #進行人臉辨識 t1=time.time() for label, confidence, bbox in detections: left,top,right,bottom = darknet.bbox2points(bbox) face = image[top:bottom,left:right] self.face = face self.label_flag = label if self.Recognition_check == True: if detections: face = self.face scaled_arr = None try: scaled_arr = cv2_face(face) except: scaled_arr = None if scaled_arr is not None: feed_dict = { images_placeholder: scaled_arr, phase_train_placeholder:False ,keep_probability_placeholder:1.0} embs = sess.run(embeddings, feed_dict=feed_dict) face_class=['Others'] diff = [] #尋找最相近的人臉特徵 for emb in emb_arr: diff.append(np.mean(np.square(embs[0] - emb))) min_diff=min(diff) index=np.argmin(diff) if min_diff<Threshold: face_class[0]=class_arr[index] #把人名印在圖片上 cv2.putText(image, '{}'.format(face_class[0]), (left,top - 50), cv2.FONT_HERSHEY_SIMPLEX, 1,name_color, 2) #把loss印在人臉附近 cv2.putText(image, 'loss:{:.4f}'.format(min_diff),(left,top - 25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,204,0), 2) #ntime = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()) if self.clock % 120 == 0: self.recorded_people.clear() #print("recorded_people = ",self.recorded_people) if self.ui.tabWidget.currentIndex() == 0: if self.clock % 120 == 0: if face_class[0]!='Others' and face_class[0] not in self.recorded_people : #人名記錄起來 self.recorded_people.append(face_class[0]) #上傳資料庫search表 T = time.localtime() record_data = { "user_name" : face_class[0], "year" : T.tm_year, "month": T.tm_mon, "day" : T.tm_mday, "hour" : T.tm_hour, "min" : T.tm_min, "sec" : T.tm_sec, "mask" : label, "table":'search' } conn = requests.post("http://140.136.150.100/record.php",data = record_data) #print(face_class[0],ntime) #print(conn.text) self.show_dialog2 = 1 if face_class[0] == 'Others': self.show_dialog3 = 1 self.dialog3_counter += 1 if self.Recognition_check == False: if self.ui.tabWidget.currentIndex() == 0: if self.clock % 120 == 0: #上傳資料庫search2表 T = time.localtime() record_data = { "user_name" : 'Unknow', "year" : T.tm_year, "month": T.tm_mon, "day" : T.tm_mday, "hour" : T.tm_hour, "min" : T.tm_min, "sec" : T.tm_sec, "mask" : self.label_flag, "table":'search2' } conn = requests.post("http://140.136.150.100/record.php",data = record_data) print(conn.text) self.show_dialog2 = 1 #全部處理完的時間點 t2=time.time() t = int(1/(t2-t1+self.yolo_t)) #印上辨識時間 cv2.putText(image, 'FPS:{}'.format(t), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,204,0), 1) self.YOLO_image_queue.put(image) #把RGB圖片存起來 self.clock += 1 #print("cloock=",self.clock) #if cv2.waitKey(fps) == 27: # break print("Thread 3 stop") self.cap.release()
def process_frame(self, frame): if self.initFlag is False: self.height, self.width, self.channels = frame.shape print('INITIALISED') print('VIDEO PROPERTIES:') print(' WIDTH: ', self.width) print(' HEIGHT: ', self.height) print(' CHANNELS:', self.channels) if self.videoFlag: fourcc = cv2.VideoWriter_fourcc(*'XVID') self.video = cv2.VideoWriter(self.output_file, fourcc, 30, (self.width, self.height)) self.initFlag = True #print("STAMP: "+str(image.header.stamp.secs)+"."+str(image.header.stamp.nsecs)) #print("SEQ:",image.header.seq) print("FRAME:", self.frame_id) self.frame_id += 1 #print("TIME:",rospy.Time.now()) # Process Image prev_time = time.time() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) detections = self.image_detection(frame) # Iterate through each detection and create BoundingBox message for class_name, confidence, bbox in detections: xmin, ymax, xmax, ymin = darknet.bbox2points(bbox) confidence = confidence class_name = class_name id = -1 if self.displayFlag: colour = self.class_colours[class_name] left, top, right, bottom = darknet.bbox2points(bbox) cv2.rectangle(frame, (left, top), (right, bottom), colour, 2) cv2.putText( frame, str(class_name) + " [" + str(round(float(confidence), 1)) + '%]', (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.75, colour, 2) print("DETECTION COUNT:", len(detections)) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) fps = round((1 / (time.time() - prev_time)), 1) #fps = int(1/(time.time() - self.prevTime)) #self.prevTime = time.time() print("FPS:", fps) if self.displayFlag: window_name = "Darknet" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) #cv2.moveWindow(window_name, 0,0) cv2.resizeWindow(window_name, 640, 480) #cv2.resizeWindow(window_name, 1280,1024) #cv2.resizeWindow(window_name, 1280,800) cv2.imshow(window_name, frame) if cv2.waitKey(1) & 0xFF == ord('q'): exit() print("test") if self.videoFlag: self.video.write(frame)