def clasifyImage(image_path): if os.path.isfile(image_path): # Read image using video capture as the YOLO Python API use it cap = cv2.VideoCapture(image_path) # Create a darknet image darknet_image = darknet.make_image(darknet.network_width(netMain), darknet.network_height(netMain), 3) # Read the image, set the correct color and resize it. The resulting image is saved in darknet_image ret, frame_read = cap.read() frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize( frame_rgb, (darknet.network_width(netMain), darknet.network_height(netMain)), interpolation=cv2.INTER_LINEAR) darknet.copy_image_from_bytes(darknet_image, frame_resized.tobytes()) # Classify the image detections = darknet.classify(netMain, metaMain, darknet_image) # Only return top detection in thhe format 'class_name - probability' top_detection = detections[0] class_name, probability = top_detection return (class_name.decode("utf-8") + " - " + str(probability)) else: return ("File " + image_path + " not found")
def slave_labor(frame): h, w, _ = frame.shape roi_array = [] full_im, _ = darknet.array_to_image(frame) darknet.rgbgr_image(full_im) gpu_lock.acquire() if args.yolo: # r = lightnet.detect_from_memory(yolo_net, yolo_meta, full_im, thresh=0.75, debug=False) if args.debug: print(r) roi_array = cvDrawBoxes(r, frame) if args.debug: print_timestamp("yolo") if not roi_array: roi_array = [(0, 0, w, h)] results_hier = [] results_flat = [] for i, _ in enumerate(nets): results = [] for roi in roi_array: if args.yolo: frame_roi = frame[roi[1]:roi[3], roi[0]:roi[2]] if args.socket or not args.interactive: cv.imshow("frame_roi", frame_roi) else: frame_roi = frame im, _ = darknet.array_to_image(frame_roi) darknet.rgbgr_image(im) r = darknet.classify(nets[i], metas[i], im) print_timestamp("classify") results.extend(r) results_flat.extend(r) # results = sorted(results, key=lambda x: -x[1]) results_hier.append(results) gpu_lock.release() results_flat = sorted(results_flat, key=lambda x: -x[1]) top_k = args.top_k if top_k >= len(results_flat): top_k = len(results_flat) preds = [] for rank in range(0, top_k): left = 10 top = 20 + rank * 20 (label, score) = results_flat[rank] if score >= args.threshold: preds.append((label[4:], score)) text = '%s %.2f%%' % (label, score * 100) labelSize, baseLine = cv.getTextSize(text, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) back_clr = (222, 222, 222) if score > args.gold_confidence: back_clr = (122, 122, 255) cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), back_clr, cv.FILLED) cv.putText(frame, text, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) if args.socket: if args.debug: now = datetime.datetime.now() now_string = now.strftime("%Y-%h-%d-%H-%M-%S-%f") image_name = 'socket_debug' + '/' + now_string + '.jpg' cv.imwrite(image_name, frame) csv_file.write(image_name) for results in results_hier: top_k = 3 for rank in range(0, top_k): (label, score) = results[rank] csv_file.write(',%s,%.3f' % (label[4:], score)) csv_file.write('\n') csv_file.flush() print_timestamp("csv_file") elif args.interactive: pass else: cv.imshow("output", frame) return preds
net, meta = lightnet.load_network_meta("obj.cfg", "weights/obj_200.weights", "obj.data") # "../../bin/cfg/darknet19_448.cfg", "../../bin/darknet19_448.weights", "../../bin/cfg/imagenet1k.data") if IMAGE_MODE: if True: frame = cv.imread(lightnet.to_str('test.jpg')) im, arr = darknet.array_to_image(frame) darknet.rgbgr_image(im) else: im = darknet.load_image( lightnet.to_str('test.jpg').encode("ascii"), 0, 0) r = darknet.classify(net, meta, im) print(r) else: cap = cv.VideoCapture(0) if not cap.isOpened(): raise Exception('Fail to open %s' % (0)) while True: hasFrame, frame = cap.read() if not hasFrame: cv.waitKey() break # cv.imwrite('test.jpg', frame) cols = frame.shape[1] rows = frame.shape[0] im, arr = darknet.array_to_image(frame)
return net, meta if __name__ == '__main__': import os classify_modu = load_classify_module( b"./config/new_chinese_classify.cfg", b"./classify_checkpoints/new_chinese_classify_72.weights", b"./config/chinese_classify.data") # classify_modu = load_classify_module(b"./config/alexnet.cfg", # b"./classify_checkpoints/alexnet_1129.weights", # b"./config/chinese_classify.data") VALID_PATH = './classify_data/valid' images = os.listdir(VALID_PATH) images.remove('.gitignore') truth_count = 0 for image_name in images: path = os.path.join(VALID_PATH, image_name) _, unicode_name = image_name[:-4].split('_') img = load_image(path.encode(), 0, 0) print(path) res = classify(classify_modu[0], classify_modu[1], img) flag = False if unicode_name in [pred_item[0].decode() for pred_item in res[0:5]]: flag = True truth_count += 1 print('原图为', unicode_name, '识别结果为:', res[0:5], flag) print('Truth count:', truth_count)
def crack(img_path, dtc_modu, classify_modu, k): # 定位汉字,返回多个矩形框 print('\n' * 2 + '定位汉字' + '\n' + '*' * 80) d = time.time() rets = detect(dtc_modu[0], dtc_modu[1], img_path.encode()) print('定位汉字耗时{}'.format(time.time() - d)) l = len(rets) # 设置阈值 if l > k: return 0 # 切割图片,返回切割后的汉字图片 print('\n' * 2 + '切割图片' + '\n' + '*' * 80) s = time.time() hanzi_list = seg_one_img(img_path, rets) # print(hanzi_list)mmmmmmmmmmmmmm print('切割图片耗时{}'.format(time.time() - s)) # 汉字识别,返回汉字字符串 print('\n' * 2 + '汉字识别' + '\n' + '*' * 80) r = time.time() all_hanzi_lists = [] # 存储所有汉字的列表 # 提取路径存入列表 paths = [] for per in hanzi_list: paths.extend([i for i in per.keys()]) for path in paths: # 对切割的汉字图片进行遍历 hanzis = [] img = load_image(path.encode(), 0, 0) res = classify(classify_modu[0], classify_modu[1], img) print(res[0:5]) if res[0][1] < 0.95: for hz in res[0:5]: # 对识别的top5进行遍历 hanzi = ('\\' + hz[0].decode('utf-8') ).encode('utf-8').decode('unicode_escape') hanzis.append(hanzi) else: hanzi = ('\\' + res[0][0].decode('utf-8') ).encode('utf-8').decode('unicode_escape') hanzis.append(hanzi) all_hanzi_lists.append(hanzis) # print(all_hanzi_lists)mmmmmmmmmmmmmmmmmmmmmmmmmm hanzi_combination = combination(*all_hanzi_lists) # print(hanzi_combination) hanzi_combination_connect = [] for words in hanzi_combination: hanzi_combination_connect.append(''.join(words)) # print(hanzi_combination_connect)mmmmmmmmmmmmmmmmmmmmm print('汉字识别耗时{}'.format(time.time() - r)) # 识别语序 hanzi_center = [] jieba_flag = 0 o = time.time() print('\n' * 2 + '语序识别' + '\n' + '*' * 80) for words in hanzi_combination_connect: # 对每一个组合进行结巴分词 # 此处对汉字的坐标进行记忆 hanzi_center = recordCoordinate(words, hanzi_list) # print(hanzi_center, 'jiaba')mmmmmmmmmmmmm o = time.time() rec_word_possible = recog_order_jieba(words) if rec_word_possible: # 如果遇到正确的词,则标志位置1 jieba_flag = 1 break if jieba_flag: rec_word = rec_word_possible else: hanzi_center = recordCoordinate(hanzi_combination_connect[0], hanzi_list) # print(hanzi_center, 'engine')mmmmmmmmmmmmmmm rec_word = search_engine_recog(hanzi_combination_connect[0]) print('语序识别结果:{}'.format(rec_word)) print('语序识别耗时{}'.format(time.time() - o)) # 按正确语序输出坐标 print('\n' * 2 + '最终结果' + '\n' + '*' * 80) centers = [] for i in rec_word: centers.append(hanzi_center[i]) print('正确语序的坐标:{}'.format(centers)) print('总耗时{}'.format(time.time() - d)) ## 调用时需要返回坐标 return (rec_word)
parser.add_argument('--gold_confidence', type=float, default=0.95) parser.add_argument('--display_confidence', type=float, default=0.5) args = parser.parse_args() net, meta = lightnet.load_network_meta(args.config, args.weights, args.data) if args.image is not None: if True: frame = cv.imread(lightnet.to_str('test.jpg')) im, arr = darknet.array_to_image(frame) darknet.rgbgr_image(im) else: im = darknet.load_image(lightnet.to_str('test.jpg', True), 0, 0) r = darknet.classify(net, meta, im) print(r) else: cap = cv.VideoCapture(args.camera) if not cap.isOpened(): raise Exception('Fail to open %s' % (0)) while True: hasFrame, frame = cap.read() if not hasFrame: cv.waitKey() break im, arr = darknet.array_to_image(frame) darknet.rgbgr_image(im) results = darknet.classify(net, meta, im) # print(r[0])