def predict(filepath): predictions = dn.detect(net, meta, str.encode(filepath)) temp = [] img = Image.open(filepath) width = img.size[0] height = img.size[1] for prediction in predictions: label = prediction[0] prob = prediction[1] cx = prediction[2][0] cy = prediction[2][1] w = prediction[2][2] h = prediction[2][3] x1 = cx - w / 2 y1 = cy - h / 2 x2 = cx + w / 2 y2 = cy + h / 2 temp.append({ 'label': label, 'prob': prob, 'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2 }) return jsonify(temp)
def process(): file = request.files['file'] if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) adaptive_resize(os.path.join(app.config['UPLOAD_FOLDER'], filename)) sys.path.append(os.path.join(os.path.dirname(__file__), 'darknet')) import darknet.darknet as dn #net = dn.load_net(bytes("darknet/cfg/tiny-yolo.cfg", 'ascii'), bytes("darknet/tiny-yolo.weights", 'ascii'), 0) net = dn.load_net(bytes("darknet/cfg/yolo.cfg", 'ascii'), bytes("darknet/yolo.weights", 'ascii'), 0) meta = dn.load_meta(bytes("darknet/cfg/coco.data", 'ascii')) r = dn.detect(net, meta, bytes(app.config['UPLOAD_FOLDER'] + filename, 'ascii'), thresh=.5) res = '' for object in r: name = object[0].decode('utf-8') prob = float(object[1]) rct = object[2] res += 'Detected <b>' + name + '</b> with probability of ' + str( round(prob, 2)) #res += ' ' + str(rct) res += '<br />' draw_object(app.config['UPLOAD_FOLDER'] + filename, rct) return render_template('result.html', filename='uploads/' + filename + '?' + str(randint(0, 999)), result_text=res) else: return 'invalid data'
def detect_lp(output_dir, loaded_models, Iorig_name): imgs_paths = glob('%s/%s_*car.png' % (output_dir, Iorig_name)) print('Searching for license plates...') for i, img_path in enumerate(imgs_paths): bname = splitext(basename(img_path))[0] lp_net, lp_meta, lp_threshold = loaded_models[1] R, _ = detect(lp_net, lp_meta, img_path.encode('utf-8'), thresh=lp_threshold) if len(R): Iorig = cv2.imread(img_path) WH = np.array(Iorig.shape[1::-1], dtype=float) Llp = [] for i, r in enumerate(R): cx, cy, w, h = (np.array(r[2]) / np.concatenate( (WH, WH))).tolist() tl = np.array([cx - w / 2., cy - h / 2.]) br = np.array([cx + w / 2., cy + h / 2.]) label = Label(0, tl, br) Ilp = crop_region(Iorig, label) Llp.append(label) cv2.imwrite('%s/%s_lp.png' % (output_dir, bname), Ilp) lwrite('%s/%s_lp.txt' % (output_dir, bname), Llp) else: print('No license plate found')
def ocr_lp(output_dir, loaded_models, Iorig_path): Iorig_name = basename(splitext(Iorig_path)[0]) imgs_paths = sorted(glob('%s/%s_*_lp.png' % (output_dir, Iorig_name))) Iorig = cv2.imread(Iorig_path) for i, img_path in enumerate(imgs_paths): bname = basename(splitext(img_path)[0]) ocr_net, ocr_meta, ocr_threshold = loaded_models[2] R, (width, height) = detect(ocr_net, ocr_meta, img_path.encode('utf-8'), thresh=ocr_threshold, nms=None) if len(R): L = dknet_label_conversion(R, width, height) L = nms(L, .45) L.sort(key=lambda x: x.tl()[0]) lp_str = ''.join([chr(l.cl()) for l in L]) with open('%s/%s_str.txt' % (output_dir, bname), 'w') as f: f.write(lp_str + '\n') print('\t\tLP: %s' % lp_str) else: print('No characters found')
def detect_vehicle(img_path, output_dir, loaded_models, bname): vehicle_net, vehicle_meta, vehicle_threshold = loaded_models[0] R, _ = detect(vehicle_net, vehicle_meta, img_path.encode('utf-8'), thresh=vehicle_threshold) R = [r for r in R if r[0].decode(encoding='utf-8') in ['car']] print('%d cars found' % len(R)) if len(R): Iorig = cv2.imread(img_path) WH = np.array(Iorig.shape[1::-1], dtype=float) Lcars = [] for i, r in enumerate(R): cx, cy, w, h = (np.array(r[2]) / np.concatenate((WH, WH))).tolist() tl = np.array([cx - w / 2., cy - h / 2.]) br = np.array([cx + w / 2., cy + h / 2.]) label = Label(0, tl, br) Icar = crop_region(Iorig, label) Lcars.append(label) cv2.imwrite('%s/%s_%dcar.png' % (output_dir, bname, i), Icar) lwrite('%s/%s_cars.txt' % (output_dir, bname), Lcars)
def detect_result(self, img): try: img = str.encode(img) except: pass result, widhei = darknet.detect(self.net, self.meta, img) return result, widhei
def analyze_pic(basic_path, pic_path): det_net, det_meta, clf_model, class_dict = init_models(basic_path) print("\n [*]Starting \n") data_path = str.encode(basic_path + pic_path) r = darknet.detect(det_net, det_meta, data_path) print(r) img = Image.open(data_path) width = img.size[0] height = img.size[1] print(img.size) result = {} result['status'] = "ok" result['predictions'] = [] for index, box in enumerate(r): print(str(index) + "\n") prob = box[1] x, y, w, h = box[2][0], box[2][1], box[2][2], box[2][3] left = x - w / 2 upper = y - h / 2 right = x + w / 2 down = y + h / 2 cropped = img.crop((x - w / 2, y - h / 2, x + w / 2, y + h / 2)) # (left, upper, right, lower) y = predict(clf_model, cropped) class_id = np.argsort(y[0])[::-1][0] str_class = class_dict[class_id] print(str_class, y[0][class_id]) jbox = {} jbox['label_id'] = str(class_id) jbox['label'] = str(str_class) # y_min,x_min,y_max,x_max print(left, right, upper, down) print(width, height) jbox['detection_box'] = [ max(0, upper / height), max(0, left / width), min(1, down / height), min(1, right / width) ] result['predictions'].append(jbox) print(result) return result
def runInference(self, frame, frameW, frameH, confidenceLevel): try: detections = darknet.detect(darknet.netMain, darknet.metaMain, frame, confidenceLevel) countsByClassId = {} for detection in detections: classLabel = detection[0] classID = str(detection[0], encoding) confidence = detection[1] if confidence > confidenceLevel: if classID not in countsByClassId: countsByClassId[classID] = 1 else: countsByClassId[classID] = countsByClassId[classID] + 1 bounds = detection[2] xEntent = int(bounds[2]) yExtent = int(bounds[3]) # Coordinates are around the center xCoord = int(bounds[0] - bounds[2] / 2) yCoord = int(bounds[1] - bounds[3] / 2) self.__draw_rect(frame, classID, confidence, xCoord, yCoord, xCoord + xEntent, yCoord + yExtent) if len(countsByClassId) > 0 and ( datetime.now() - self.lastMessageSentTime).total_seconds() >= 1: strMessage = json.dumps(countsByClassId) message = IoTHubMessage(strMessage) print(strMessage) AppState.HubManager.send_event_to_output("output1", message, 0) self.lastMessageSentTime = datetime.now() except Exception as e: print("Exception during AI Inference") print(e)
def loopfunction(self): currPlaying = self.vlc.getCurrPlaying().decode('utf-8')[2:].strip() if(self.FILENAME!=currPlaying): self.FILENAME = currPlaying print(currPlaying) if not os.path.isfile(currPlaying): #time.sleep(1) self.GUIapp.after(1, self.loopfunction) return self.currPlayingStr.set(currPlaying) self.cap = cv2.VideoCapture(str(currPlaying)) self.FPS=self.cap.get(cv2.CAP_PROP_FPS) self.idx = 0 self.oldDrawnIdx = 0 if not os.path.isfile(currPlaying): #time.sleep(1) self.GUIapp.after(1, self.loopfunction) return start = time.time() ret, frame = self.cap.read() if( not ret ): self.GUIapp.after(1, self.loopfunction) return dknetF = detect(np.array(frame)) #dknetF_r = [list(i[2]) for i in dknetF] #print("r: ", dknetF_r) r = self.tracker.track(frame, dknetF) #print("Time for detection: ", 1000*(time.time()-start)) for i in r: if i[0]==self.SEARCHTERM: self.locs.append(self.idx) print(self.idx) self.idx += 1 # Add buttons for i in self.locs[self.oldDrawnIdx:]: Button(self.GUIapp, text=str(i), command=lambda j=i: self.vlc.seek(int(i/self.FPS))).pack() self.oldDrawnIdx = len(self.locs) self.GUIapp.after(1, self.loopfunction)
ocr_weights.encode('utf-8'), 0) ocr_meta = dn.load_meta(ocr_dataset.encode('utf-8')) imgs_paths = sorted(glob('%s/*lp.png' % output_dir)) print('Performing Character Recognition...') for i, img_path in enumerate(imgs_paths): print('\tScanning %s' % img_path) bname = basename(splitext(img_path)[0]) R, (width, height) = detect(ocr_net, ocr_meta, img_path.encode('utf-8'), thresh=ocr_threshold, nms=None) if len(R): L = dknet_label_conversion(R, width, height) L = nms(L, .45) L.sort(key=lambda x: x.tl()[0]) lp_str = ''.join([chr(l.cl()) for l in L]) with open('%s/%s_str.txt' % (output_dir, bname), 'w') as f: f.write(lp_str + '\n') print('\t\tLP: %s' % lp_str)
lp_net = dn.load_net(lp_netcfg.encode('utf-8'), lp_weights.encode('utf-8'), 0) lp_meta = dn.load_meta(lp_dataset.encode('utf-8')) imgs_paths = glob('%s/*car.png' % input_dir) print('Searching for license plates...') for i, img_path in enumerate(imgs_paths): print('\t Processing %s' % img_path) bname = splitext(basename(img_path))[0] R, _ = detect(lp_net, lp_meta, img_path.encode('utf-8'), thresh=lp_threshold) #R = [r for r in R if r[0] in ['lp']] if len(R): Iorig = cv2.imread(img_path) WH = np.array(Iorig.shape[1::-1], dtype=float) Llp = [] for i, r in enumerate(R): cx, cy, w, h = (np.array(r[2]) / np.concatenate( (WH, WH))).tolist() tl = np.array([cx - w / 2., cy - h / 2.]) br = np.array([cx + w / 2., cy + h / 2.]) label = Label(0, tl, br) Ilp = crop_region(Iorig, label)
def runInference(self, frame, frameW, frameH, confidenceLevel): try: countsByClassId = {} boxes = [] yoloDetections = [] frame_small = cv2.resize(frame, (416, 416)) detections = darknet.detect(darknet.netMain, darknet.metaMain, frame_small, confidenceLevel) boundingBoxes = np.array( list((item[2][0], item[2][1], item[2][0] + item[2][2], item[2][1] + item[2][3]) for item in detections[:])) idxs = self.__non_max_suppression_fast(boundingBoxes, 0.3) reducedDetections = [detections[idx] for idx in idxs] for detection in reducedDetections: classID = str(detection[0], encoding) confidence = detection[1] if confidence > confidenceLevel: if classID not in countsByClassId: countsByClassId[classID] = 1 else: countsByClassId[classID] = countsByClassId[classID] + 1 bounds = detection[2] * np.array([ frameW / 416, frameH / 416, frameW / 416, frameH / 416 ]) width = int(bounds[2]) height = int(bounds[3]) # Coordinates are around the center xCoord = int(bounds[0] - bounds[2] / 2) yCoord = int(bounds[1] - bounds[3] / 2) # std: obsolete, if working with tracker box = [xCoord, yCoord, xCoord + width, yCoord + height] boxes.append(box) yoloDetections.append( YoloDetection(box, classID, confidence)) # draw detection into frame tagged with class id an confidence #self.__draw_rect(frame, classID, confidence, xCoord, yCoord, xCoord + width, yCoord + height) if False and __myDebug__: if detections is not None and len(detections) > 1: ptvsd.break_into_debugger() else: return yoloDetections #if len(countsByClassId) > 0 and (datetime.now() - self.lastMessageSentTime).total_seconds() >= 1: # strMessage = json.dumps(countsByClassId) # message = IoTHubMessage(strMessage) # print(strMessage) # AppState.HubManager.send_event_to_output("output1", message, 0) # self.lastMessageSentTime = datetime.now() except Exception as e: print("Exception during AI Inference") print(e) return yoloDetections
imgs_paths = image_files_from_folder(input_dir) imgs_paths.sort() if not isdir(output_dir): makedirs(output_dir) print('Searching for vehicles...') for i, img_path in enumerate(imgs_paths): print('\tScanning %s' % img_path) bname = basename(splitext(img_path)[0]) R, _ = detect(vehicle_net, vehicle_meta, img_path.encode('utf-8'), thresh=vehicle_threshold) R = [r for r in R if r[0].decode(encoding='utf-8') in ['car']] print('\t\t%d cars found' % len(R)) if len(R): Iorig = cv2.imread(img_path) WH = np.array(Iorig.shape[1::-1], dtype=float) Lcars = [] for i, r in enumerate(R): cx, cy, w, h = (np.array(r[2]) / np.concatenate( (WH, WH))).tolist() tl = np.array([cx - w / 2., cy - h / 2.])
def inference(self, input): # TODO: having to write out image is inefficient. # See if YOLO can be converted to work with numpy images cv2.imwrite("frame.png", input) res = detect(self.net, self.meta, "frame.png") return res
def detect(path, detect_vid, net, meta): if detect_vid == True: cap = cv2.VideoCapture(path) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) fps = int(cap.get(5)) fourcc = int(cap.get(cv2.CAP_PROP_FOURCC)) fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') vout_1 = cv2.VideoWriter('output.avi', fourcc, fps, size) tracker = cv2.TrackerKCF_create() cnt = 0 tracking = False while True: print('\r%d' % cnt, end='') ret, frame = cap.read() if ret != True: break if tracking == False: cv2.imwrite('temp.jpg', frame) detect_result = darknet.detect( net, meta, 'temp.jpg'.encode('utf-8')) #调用模型 for bbox in detect_result: label = bbox[0].decode('utf-8').strip('\r') x_center, y_center, w, h = bbox[2] x1 = int(x_center - w / 2) x2 = int(x_center + w / 2) y1 = int(y_center - h / 2) y2 = int(y_center + h / 2) p1 = (x1, y1) p2 = (x2, y2) frame = cv2.rectangle(frame, p1, p2, (0, 255, 0), 2) ok = tracker.init(frame, bbox[2]) tracking = True if (tracking == True): tracking, tracking_box = tracker.update(frame) x_center, y_center, w, h = tracking_box x1 = int(x_center - w / 2) x2 = int(x_center + w / 2) y1 = int(y_center - h / 2) y2 = int(y_center + h / 2) p1 = (x1, y1) p2 = (x2, y2) frame = cv2.rectangle(frame, p1, p2, (255, 0, 0), 2) vout_1.write(frame) print(tracking) cnt += 1 else: if (os.path.exists(path)): img = cv2.imread(path) detect_result = darknet.detect(net, meta, path.encode('utf-8')) img.shape if (detect_result == []): return False label = { bbox[0].decode('utf-8').strip('\r') for bbox in detect_result } if 'logo' not in label: return False else: if ('caption' in label): return True else: return False elif (path[-3:] not in {'jpg', 'jpeg', 'JPG'}): return 'Error in ext' else: return 'Unknown Error'