def analyze(image): image = imutils.resize(image, width=min(400, image.shape[1])) #orig = image.copy() # detect people in the image (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) # draw the original bounding boxes #for (x, y, w, h) in rects: #cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes #for (xA, yA, xB, yB) in pick: # cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) # show some information on the number of bounding boxes #filename = imagePath[imagePath.rfind("/") + 1:] #print("[INFO] {}: {} original boxes, {} after suppression".format( # filename, len(rects), len(pick))) # show the output images #cv2.imshow("Before NMS", orig) #cv2.imshow("After NMS", image) #cv2.waitKey(0) return pick
def processImage(frame): #frame = imutils.resize(frame, width = min(600, frame.shape[1])) # detect people in the image rects = classifier.detectMultiScale(frame, 1.1, 200)# winStride=(4, 4), padding=(8, 8), scale=1.05) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2) # show some information on the number of bounding boxe print("[INFO]: {} are currently detected in stream".format(len(pick))) # show the output images try: cv2.imshow('Stream', frame) except: pass cv2.waitKey(1)
def detector(image): ''' @image is a numpy array ''' clone = image.copy() (rects, weights) = HOGCV.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) print(rects) print(weights) # draw the original bounding boxes for (x, y, w, h) in rects: print ("inside rects:",x,y,w,h) cv2.rectangle(clone, (x, y), (x + w, y + h), (0, 0, 255), 2) # Applies non-max supression from imutils package to kick-off overlapped # boxes rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) result = non_max_suppression(rects, probs=None, overlapThresh=0.65) #if result: print ("result=") print (result) return result
def process(content, app): if not isinstance(content, unicode): return [] image_data = re.sub('^data:image/.+;base64,', '', content).decode('base64') image = Image.open(cStringIO.StringIO(image_data)) image = cv2.cvtColor(np.array(image), 2) # cv2.imwrite('image.jpg', image) # gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) faces = cascade.detectMultiScale(image, 1.03, 500, minSize=(10, 10)) if(len(faces) <= 0): return [] rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in faces]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.15) good = [] for (x, y, x2, y2) in pick: good.append( {'x': x*1, 'y': y*1, 'width': (x2-x)*1, 'height': (y2-y)*1}) # for f in good: # cv2.rectangle( # image, # (f.get('x'), f.get('y')), # (f.get('width'), f.get('height')), # (0, 255, 0), 6) # cv2.imwrite('image.jpg', image) return good
def detect_object(self, frame, min_width=35, min_height=35): """ 将二值化图像中的物体挑选出来 :param frame: 二值化图像 :param min_width: 物体最小宽度 :param min_height: 物体最小高度 :return: 每个物体的矩形框左上角x1, y1坐标, 右下角x2, y2坐标和物体中心坐标cx, cy [(x1, y1, x2, y2), (cx, cy)] """ matches = [] # 找到物体边界矩形 image, contours, hierarchy = cv.findContours(frame, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_TC89_L1) # 利用非极大值抑制法避免一个物体上多个矩形(误检测多次) rects = np.array([(x, y, x + w, y + h) for x, y, w, h in map(cv.boundingRect, contours)]) pick = non_max_suppression(rects, overlapThresh=0.65) # 从每个坐标中选出符合标准大小的坐标(物体) for x1, y1, x2, y2 in pick: # 判断物体大小是否大于设定的标准 is_valid = (x2 - x1 > min_width) and (y2 - y1 > min_height) # 符合标准, 将矩形坐标和物体中心坐标添加到列表中 if is_valid: centroid = self._get_centroid(x1, y1, x2, y2) matches.append([(x1, y1, x2, y2), centroid]) return matches
def count_peds(self, image): if image is None: return(0,image) image = imutils.resize(image, width=min(400, image.shape[1])) orig = image.copy() # detect people in the image (rects, weights) = self.hog.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) # draw the original bounding boxes for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) cv2.putText(image, "{}:Peds".format(len(pick)), (image.shape[1]-40, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) # return the image with rectangles drawn over the pedestrians # as well as a count of pedestrians return( len(pick), image)
def beginVideoProcess(im): # load the image and resize it to (1) reduce detection time # and (2) improve detection accuracy image = im.copy() orig = image.copy() #image = cv2.imread(imagePath) image = imutils.resize(image, width=min(400, image.shape[1])) # detect people in the image (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) # draw the original bounding boxes for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) # show some information on the number of bounding boxes filename = "webcam" print("[INFO] {}: {}".format(filename, len(pick))) # show the output images cv2.imshow("After NMS", image) return len(pick)
def detectMultiscale(filename, winStride = (4, 4)): # read image filepath = "test/ship/multi_scale/" + filename image = cv2.imread(filepath, 0) # load classifier clf = joblib.load(HOG_CLF_FILE) height, width = image.shape # detect ships in image positions = [] for i in xrange(0, height - 128 - winStride[0], winStride[0]): print float(i) / height * 100 for j in xrange(0, width - 64 - winStride[0], winStride[1]): hog_fd = hog(image[i : i + 128, j : j + 64], orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualise=False) nbr = clf.predict(np.array([hog_fd], 'float64')) if nbr[0] == 1: positions.append((j, i)) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[j, i, j + 64, i + 128] for (j, i) in positions]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.5) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 1) cv2.imwrite("results/ship/multi_scale/" + filename, image)
def hello(): # we are reading from webcam camera = cv2.VideoCapture(WEB_CAM_INDEX) time.sleep(0.25) # initialize the HOG descriptor/person detector hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) # grab the current frame (grabbed, frame) = camera.read() # resize the frame, convert it to grayscale, and blur it frame = imutils.resize(frame, width=min(FRAME_WIDTH, frame.shape[1])) # detect people in the image (rects, weights) = hog.detectMultiScale(frame, winStride=(WIN_STRIDE_VAL, WIN_STRIDE_VAL), padding=(8, 8), scale=SCALE_VAL) orig = frame.copy() occupied = False inColisionZone = False # draw the original bounding boxes for (x, y, w, h) in rects: if ( y + h ) < NEAREST_POINT: cv2.rectangle(orig, (x, y), (x + w, y + h), (255, 0, 0), 2) else: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 255, 0), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: occupied = True if ( y + h ) < NEAREST_POINT: cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) else: inColisionZone = True cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) # cleanup the camera and close any open windows camera.release() # cv2.destroyAllWindows() data = { 'isOccupied' : occupied, 'inColisionZone' : inColisionZone } print (inColisionZone) resp = jsonify(data) resp.status_code = 200 return resp
def readImage(image): (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) count=0 for (xA, yA, xB, yB) in pick: count=count+1 return count
def draw_detections(img, rects, thickness = 1): global passTotal pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) for x, y, w, h in pick: # the HOG detector returns slightly larger rectangles than the real objects. # so we slightly shrink the rectangles to get a nicer output. pad_w, pad_h = int(0.15*w), int(0.05*h) cv2.rectangle(img, (x+pad_w, y+pad_h), (x+w-pad_w, y+h-pad_h), (0, 255, 0), thickness) passagem = len(pick) passTotal += passagem #passagem = str(len(pick)) print("Passagem Total: " + str(passTotal))
def process(content, app): if not isinstance(content, unicode): return [] image_data = re.sub('^data:image/.+;base64,', '', content).decode('base64') image = Image.open(cStringIO.StringIO(image_data)) image = cv2.cvtColor(np.array(image), 2) # cv2.imwrite('image.jpg', image) gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) faces = cascade.detectMultiScale(image, 1.03, 500, minSize=(10, 10)) if(len(faces) <= 0): return [] rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in faces]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.15) good = [] for (x, y, x2, y2) in pick: obj = gray[(y-IMAGE_PADDING):(y2+IMAGE_PADDING), (x-IMAGE_PADDING):(x2+IMAGE_PADDING)] if obj.shape[0] == 0 or obj.shape[1] == 0: continue ratio = IMAGE_SIZE/obj.shape[1] obj = cv2.resize(obj, (int(IMAGE_SIZE), int(obj.shape[0]*ratio))) # find the keypoints and descriptors for object kp_o, des_o = orb.detectAndCompute(obj, None) if len(kp_o) == 0: continue # match descriptors matches = bf.match(des_r, des_o) if(len(matches) >= MATCH_THRESHOLD): good.append({ 'x': x*1, 'y': y*1, 'width': (x2-x)*1, 'height': (y2-y)*1, 'label': 'battlefront' }) # for f in good: # cv2.rectangle( # image, # (f.get('x'), f.get('y')), # (f.get('width'), f.get('height')), # (0, 255, 0), 6) # cv2.imwrite('image.jpg', image) return good
def detect_people(frame): """ detect humans using HOG descriptor Args: frame: Returns: processed frame """ (rects, weights) = hog.detectMultiScale(frame, winStride=(8, 8), padding=(16, 16), scale=1.06) rects = non_max_suppression(rects, probs=None, overlapThresh=0.65) for (x, y, w, h) in rects: cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2) return frame
def get_people(): image,_ = freenect.sync_get_video() image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) image = imutils.resize(image, width=min(400, image.shape[1])) # detect people in the image (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(32, 32), scale=1.05) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) pointX, pointY = 200, 200 rectX, rectY = xA, yA rectX2, rectY2 = xB, yA rectX3, rectY3 = xA, yB rectX4, rectY4 = xB, yB ''' xA,yA -----2 | | | | | | 3-------xB,yB''' rectXs, rectYs = [rectX, rectX2, rectX3, rectX4], [rectY, rectY2, rectY3, rectY4] largestX = max(rectXs) smallestX = min(rectXs) largestY = max(rectYs) smallestY = min(rectYs) if (pointX > smallestX and pointX < largestX and pointY > smallestY and pointY < largestY) and hit: if pointY < (largestY-smallestY)/3 + smallestY: print("point is in 3/3 (bottom)") r = requests.post("https://d6fb041f.ngrok.io/post", data={"part": "limb"}) else: if pointY < (largestY-smallestY)/3*2 + smallestY: print("point is in 2/3 (middle)") r = requests.post("https://d6fb041f.ngrok.io/post", data={"part": "body"}) else: if (pointY < (largestY-smallestY)/3*3 + smallestY): print("point is in 1/3 (top)") r = requests.post("https://d6fb041f.ngrok.io/post", data={"part": "head"}) else: print("point not inside box") hit = False # check if middle is in x area # set hit to top, middle, or bottom cv2.imshow('People detected picture', image)
def trackPerson(): global imageReceived global metaInfoReceived global imageReceivedProcessed # check for no frame if imageReceived is None: return None, None # get frame after locking lock.acquire() image = imageReceived metaInfo = metaInfoReceived imageReceivedProcessed = True lock.release() orig = image.copy() # detect people in the image (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) # draw the original bounding boxes for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = numpy.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) # show some information on the number of bounding boxes #filename = imagePath[imagePath.rfind("/") + 1:] #filename = "FILE" #print("[INFO] {}: {} original boxes, {} after suppression".format( # filename, len(rects), len(pick))) # show the output images cv2.imshow("Before NMS", orig) cv2.imshow("After NMS", image) #print time.time() - start return pick, metaInfo
def rois_extraction (cv_image): pre_rois = [] boundingBoxing = [] # cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) clone = copy.copy(cv_image) clone_2 = copy.copy(cv_image) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(3,3)) grayimg = clahe.apply(clone) grayimg = cv2.GaussianBlur(grayimg,(5,5),0) ret1,th1 = cv2.threshold(grayimg, min_th, 255,cv2.THRESH_BINARY) re2,th2 = cv2.threshold(grayimg, max_th, 255, cv2.THRESH_BINARY_INV) band_thresh = cv2.bitwise_and(th1, th2) # print "i make a threshold band" contours, hierarchy = cv2.findContours(band_thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE) mask = np.zeros(clone.shape, dtype=np.uint8) cv2.drawContours(mask,contours,-1,255,-1) # print "contours done" for c in contours: #if the contour is to small, ignore it # print "search contours" if cv2.contourArea(c) > min_area and cv2.contourArea(c) < max_area: boundingBoxing.append(cv2.boundingRect(c)) #print "I found a ROI" else: #print "Is not ROI" continue for (x, y, w, h) in boundingBoxing: cv2.rectangle(cv_image, (x, y), (x + w, y + h), 0, 1) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in boundingBoxing]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) for (xA, yA, xB, yB) in pick: aux_roi = clone[yA:yB,xA:xB] pre_rois.append(aux_roi) cv2.rectangle(clone_2, (xA, yA), (xB, yB), 0, 1) # sorted(pre_rois, cmp=order) print len(pre_rois) return pre_rois;
def find_persons(cv_image): boundingBoxing = [] clone = copy.copy(cv_image) clone_2 = copy.copy(cv_image) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(3,3)) gray = clahe.apply(cv_image) gray = cv2.GaussianBlur (gray, (21,21), 0) min_thresh = cv2.threshold(gray, min_th, 255, cv2.THRESH_BINARY)[1] max_thresh = cv2.threshold(gray, max_th, 255, cv2.THRESH_BINARY_INV)[1] thresh = cv2.bitwise_and(min_thresh, max_thresh) band = cv2.bitwise_and(clone_2,thresh) #thresh = cv2.dilate(thresh, None, iterations = 2) (cnts, _) = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for c in cnts: if cv2.contourArea(c) > min_area and cv2.contourArea(c) < max_area: boundingBoxing.append(cv2.boundingRect(c)) cv2.drawContours(clone, [c], -1, 0, 2) # print "I found a ROI" # else: # print "Is not ROI" rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in boundingBoxing]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) for (x, y, w, h) in pick: cv2.rectangle(clone, (x, y), (x + w, y + h), 0, 1) #print pick cv2.imshow("result", clone) cv2.imshow("region_detector", cv_image) cv2.moveWindow("region_detector",0,0) cv2.imshow("band_threshold_image", thresh) cv2.moveWindow("band_threshold_image",0,400) cv2.moveWindow("result",500,0) cv2.waitKey(1)
def upload_file(): if request.method == 'POST': f = request.files['file'] newFileName = "test." + f.filename.rsplit(".")[-1] f.save(newFileName) # we want to keep the file extension # image recognition stuff here image = cv2.imread(newFileName) image = imutils.resize(image, width=min(500, image.shape[1])) orig = image.copy() (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) myBoundingBox = {} myBoundingBox['xA'], myBoundingBox['yA'], myBoundingBox['xB'], myBoundingBox['yB'] = pick[0] return json.dumps(myBoundingBox)
def classfier(testImage,threadNum,capTime, detectCounter): #print(threadNum,capTime) (rects, weights) = hog.detectMultiScale(testImage, winStride=(8, 8), padding=(8, 8), scale=1.1) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes # if(pick): for (xA, yA, xB, yB) in pick: print("Image detected") detectCounter[0] = 0 cv2.rectangle(testImage, (xA, yA), (xB, yB), (0, 255, 0), 2) # print(pick,"\n"); curTime = time.time() #print ("Total time from capture", curTime - capTime) out.write(testImage) cv2.imshow("After NMS", testImage)
def ped_detect(image): image = imutils.resize(image, width=min(800, image.shape[1])) orig = image.copy() # detect people in the image (rects, weights) = hog.detectMultiScale(image, winStride=(4,4),padding=(8, 8), scale=1.05) # draw the original bounding boxes for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) # show some information on the number of bounding boxes print("[INFO]: {} original boxes, {} after suppression".format( len(rects), len(pick))) return orig, image
def Hog_Pedestrian(frame): #Copy Image orig = frame.copy() # Use HOG to detect people in image hog_start = time.time() (rects, weights) = hog.detectMultiScale(frame, winStride=(4, 4), padding=(8, 8), scale=1.2) hog_stop = time.time() # Draw the original bounding boxes for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # Use non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # Draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2) #Add FPS cv2.putText(frame, "FPS: {}".format(fps), (frame.shape[1]-100, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) # show the frame and record if the user presses a key cv2.imshow("No Max Suppression", orig) cv2.moveWindow("No Max Suppression", 0, 100) cv2.imshow("With Max Suppression", frame) cv2.moveWindow("With Max Suppression", WINDOW_SIZE+25, 100) #To track performance of HOG as we vary parameters print "HOG: " + str(hog_stop-hog_start)
cos = np.cos(angle) sin = np.sin(angle) h = x0[x] + x2[x] w = x1[x] + x3[x] endX = int(newX + (cos * x1[x]) + (sin * x2[x])) # from tensorflow endY = int(newY - (sin * x1[x]) + (cos * x2[x])) # from tensorflow startX = int(endX - w) startY = int(endY - h) coords.append((startX, startY, endX, endY)) alphas.append(scoresData[x]) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(coords), probs=alphas) # loop over the bounding boxes i = 0 for (startX, startY, endX, endY) in boxes: cv2.imwrite("Text_{}.jpg".format(i + 1), orig[startY - padding_y:endY + padding_y, startX - padding_x:endX + padding_y]) # padding gives some leeway cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2) i = i + 1 end = time.time() #print('Time Taken for data number{} is ..'.format(pan)) print(end - start) cv2_imshow(orig)
def textRecognition(): # load the input image and grab the image dimensions image = cv2.imread(path) image = image[0:900, 0:1100] cv2.imwrite('text.jpg', image) orig = image.copy() (origH, origW) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (320, 320) rW = origW / float(newW) rH = origH / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested in -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector net = cv2.dnn.readNet(east) # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) # decode the predictions, then apply non-maxima suppression to # suppress weak, overlapping bounding boxes (rects, confidences) = decode_predictions(scores, geometry) boxes = non_max_suppression(np.array(rects), probs=confidences) # initialize the list of results results = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # in order to obtain a better OCR of the text we can potentially # apply a bit of padding surrounding the bounding box dX = int((endX - startX) * 0.03) dY = int((endY - startY) * 0.12) # apply padding to each side of the bounding box, respectively startX = max(0, startX - dX) startY = max(0, startY - dY) endX = min(origW, endX + (dX * 2)) endY = min(origH, endY + (dY * 2)) results.append((startX, startY, endX, endY)) # sort the results bounding box coordinates from left to right results = sorted(results, key=lambda r: r[0]) # get the predicted size of the entire text, based on the separate # box bounds of the individual words numObj = len(results) - 1 startX = results[0][0] endX = results[numObj][2] startY = min(results[numObj][1], results[0][1]) endY = max(results[numObj][3], results[0][3]) # Sometimes it'll detect something super far away while (((endY - startY) > 100) and (numObj >= 0)): endY = max(results[numObj][3], results[0][3]) numObj = numObj - 1 roi = orig[startY:endY, startX:endX] roi = cv2.blur(roi, (5, 5)) cv2.imwrite(test, roi) # in order to apply Tesseract v4 to OCR text we must supply # (1) a language, (2) an OEM flag of 1, indicating that the we # wish to use the LSTM neural net model for OCR, and finally # (3) an OEM value, in this case, 7 which implies that we are # treating the ROI as a single line of text config = ("-l eng --oem 1 --psm 7") text = pytesseract.image_to_string(roi, config=config) if (text): # Common fixes for stuff if not (text[0].isalpha()): text = text[1:] if not (text[-1].isalpha()): text = text[:-1] if not (text[-1].isalpha()): text = text[:-1] text = text.replace("’", "'") text = text.replace(".", ",") #os.remove("text.jpg") return text
def detector(video_capture,rot_angle, ROI_1, ROI_2, ppl_width): for i in range (frame_skip): video_capture.read() #read video ret, image = video_capture.read() [height, width, layer] = image.shape; #rotate if (rot_angle != 0): (hh, ww) = image.shape[:2] center = (ww / 2, hh / 2) M = cv2.getRotationMatrix2D(center, rot_angle, 1.0) image = cv2.warpAffine(image, M, (ww, hh)) #mask before resize #image = image[ROI_1[1]:ROI_2[1],ROI_1[0]:ROI_2[0]] image = imutils.resize(image, width=min(400, image.shape[1])) ##mask after resize resize_ratio = image.shape[1] / float(width) max_ppl_size = np.ceil(ppl_width * resize_ratio * 1.4) min_ppl_size = np.ceil(ppl_width * resize_ratio * 0.8) #print max_ppl_size ROI_1 = np.int_(np.dot(ROI_1,resize_ratio)) ROI_2 = np.int_(np.dot(ROI_2,resize_ratio)) #print ROI_1 image = image[ROI_1[1]:ROI_2[1],ROI_1[0]:ROI_2[0]] #Display - origin orig = image.copy() #detect (rects, weights) = hog.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05) ##delete large rect i = 0 while (i < len(rects)): print ("ppp") print (float(rects[i][2])) if (rects[i][2] > max_ppl_size or rects[i][2] < min_ppl_size): #[x,y,w,h] = rects[i] #cv2.rectangle(orig, (x, y), (x + w, y + h), (255, 0, 0), 2) rects = np.delete(rects,i,0) else: #[x,y,w,h] = rects[i] #cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) i += 1 #Display - origin # for (x, y, w, h) in rects: # #box size validation # print ('w = ' + str(w)) # if (w < 100): # cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) #Display - origin cv2.imshow('Original', orig) #combine rectangle rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) people = 0; # draw the final bounding boxes for (xA, yA, xB, yB) in pick: #box size validation #if (xB - xA < 100): cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) people += 1 #Display - result cv2.imshow("HOG + NMS", image) # if(random.random() > 0.8): # print ("save") # fname = "./save/" + time.strftime("%m_%d_%H_%M_%S")+ ".jpg" # cv2.imwrite(fname,image) # if people > 0: return True else: return False
def pedestrians(path, w, h, n): from imutils.object_detection import non_max_suppression from imutils import paths import argparse import imutils from matplotlib import pyplot as plt import cv2 import os import numpy as np import glob from tqdm import tqdm_notebook import pickle ### Create a list with the path of all the images in the file img1 img_path = glob.glob(path + "/*.jpg") img_path.sort() img_path ### Background Subtraction of all the image in the folder and create image_bis print(" Background Subtraction of all the images in the folder ", "\n") img_path_ = list() fgbg = cv2.createBackgroundSubtractorMOG2() if not os.path.exists('img1_bis'): os.mkdir('img1_bis') # Computing background for id_im, im_path in enumerate(img_path): print("Frame #" + str(id_im) + '/' + str(len(img_path)), end="\r") im = cv2.imread(im_path) fgmask = fgbg.apply(im) img_path_.append('img1_bis' + '/' + '{:03d}'.format(id_im) + '.jpg') cv2.imwrite('img1_bis' + '/' + '{:03d}'.format(id_im) + '.jpg', np.expand_dims((fgmask > 0), axis=-1) * im) dic_Paths = dict( (path_bis, path) for (path_bis, path) in zip(img_path_, img_path)) ###pedestrian detection using HOG and SVM # initialize the HOG descriptor/person detector print("pedestrian detection using HOG and SVM", "\n") hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) dic_img_box = dict() for path in tqdm_notebook(img_path_): solution = [] image = cv2.imread(path, cv2.IMREAD_UNCHANGED) orig = image.copy() #playing with winStride impact the performance, setting it to 7,7 allows us to have #a score of 24% (rects, weights) = hog.detectMultiScale(image, winStride=(7, 7), padding=(8, 8), scale=1.05) #new_rects , new_weights = rects.copy(), weights.copy() Del = list() threshold = 0 for i in range(len(rects)): x, y, w, h = rects[i, :] if weights[i, 0] < threshold: #cv2.rectangle(orig, (x, y), (x+w, y+h), (0, 255, 0), 10) Del.append(i) rects, weights = np.delete(rects, Del, 0), np.delete(weights, Del, 0) #cv2_imshow('Pedestrians', frame) #plt.imshow(orig) #plt.show() rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) dic_img_box[path] = pick ## Exporting as pickle pickle.dump(dic_img_box, open('dic_img_box_' + '{}'.format(threshold) + '.p', "wb")) dic_img_box = pickle.load( open('dic_img_box_' + '{}'.format(threshold) + '.p', 'rb')) ###Load the image, get the contour of the shape in it and check if they are human-shape like ###and returns boxes that could be human in shape print("Possible human regions using Contour detection ", "\n") dic_img_human = dict() for path in tqdm_notebook(img_path_): im = cv2.imread(path) orig = im.copy() height, width = im.shape[:2] new_width = 500 new_height = new_width * height // width im = cv2.resize(im, (new_width, new_height), interpolation=cv2.INTER_CUBIC) # Change to gray and apply both gaussian and threshold filter im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) blurred_im = cv2.GaussianBlur(im_gray, (1, 1), 0) ret, thresh = cv2.threshold(blurred_im, 220, 255, 0) # Compute contours contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) #print( contours) # Get dimension of main contours human_boxes = [] for cnt in contours: # Compute area size area = cv2.contourArea(cnt) if area > 3: #Chosen after studying are of tarpaulin # remove overdimension of contours cnt_low = cnt[:, 0] # contour width x_max = np.max(cnt_low[:, 0]) * width // new_width x_min = np.min(cnt_low[:, 0]) * width // new_width # contour height y_max = np.max(cnt_low[:, 1]) * height // new_height y_min = np.min(cnt_low[:, 1]) * height // new_height #cv2.rectangle(orig, (x_min, y_min), (x_max, y_max), (0, 255, 0), 10) human_boxes.append([x_min, y_min, x_max, y_max]) #plt.imshow(orig) #plt.show() dic_img_human[path] = human_boxes ## Exporting as pickle pickle.dump(dic_img_human, open("dic_img_human.p", "wb")) dic_img_human = pickle.load(open('dic_img_human.p', 'rb')) print("Keeping the overlaping of the 2 sets of regions as final boxes ", "\n") def doOverlap(box1, box2): # Returns true if two rectangles(l1, r1) # and (l2, r2) overlap # If one rectangle is on left side of other if (box1[0] > box2[2] or box2[0] > box1[2]): return False # If one rectangle is above other if (box1[1] > box2[3] or box2[1] > box1[3]): return False return True if not os.path.exists('img1_boxes'): os.mkdir('img1_boxes') dic_final_boxes = dict() for (id_im, path) in tqdm_notebook(enumerate(dic_img_box.keys())): pick = dic_img_box[path] pick_ = np.copy(pick) Del = list() for i, box1 in enumerate(pick): overlap = [doOverlap(box1, box2) for box2 in dic_img_human[path]] if sum(overlap) == 0: Del.append(i) pick_ = np.delete(pick_, Del, 0) dic_final_boxes[dic_Paths[path]] = pick_ image = cv2.imread(dic_Paths[path], cv2.IMREAD_UNCHANGED) for (xA, yA, xB, yB) in pick_: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 10) cv2.imwrite('img1_boxes' + '/' + '{:03d}'.format(id_im) + '.jpg', image) ## Exporting as pickle pickle.dump(dic_final_boxes, open('dic_final_boxes_' + '{}'.format(threshold) + '.p', "wb")) dic_final_boxes = pickle.load( open('dic_final_boxes_' + '{}'.format(threshold) + '.p', 'rb')) bounding_boxes = list() for frame_id, frame_path in enumerate(list(img_path)): for bb_id, box in enumerate(dic_final_boxes[frame_path]): bounding_boxes.append([ frame_id, bb_id, box[0], box[1], box[2] - box[0], box[3] - box[1] ]) return (bounding_boxes)
def getTextFromImage(image): orig = image.copy() (origH, origW) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = 320, 320 rW = origW / float(newW) rH = origH / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested in -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet(east_text_detector) # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) # decode the predictions, then apply non-maxima suppression to # suppress weak, overlapping bounding boxes (rects, confidences) = decode_predictions(scores, geometry) boxes = non_max_suppression(np.array(rects), probs=confidences) results = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # in order to obtain a better OCR of the text we can potentially # apply a bit of padding surrounding the bounding box -- here we # are computing the deltas in both the x and y directions dX = int((endX - startX) * 1) dY = int((endY - startY) * 1) # apply padding to each side of the bounding box, respectively startX = max(0, startX - dX) startY = max(0, startY - dY) endX = min(origW, endX + (dX * 2)) endY = min(origH, endY + (dY * 2)) # extract the actual padded ROI roi = orig[startY:endY, startX:endX] # initialize the list of results results = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # in order to obtain a better OCR of the text we can potentially # apply a bit of padding surrounding the bounding box -- here we # are computing the deltas in both the x and y directions dX = int((endX - startX) * padding) dY = int((endY - startY) * padding) # apply padding to each side of the bounding box, respectively startX = max(0, startX - dX) startY = max(0, startY - dY) endX = min(origW, endX + (dX * 2)) endY = min(origH, endY + (dY * 2)) # extract the actual padded ROI roi = orig[startY:endY, startX:endX] # in order to apply Tesseract v4 to OCR text we must supply # (1) a language, (2) an OEM flag of 4, indicating that the we # wish to use the LSTM neural net model for OCR, and finally # (3) an OEM value, in this case, 7 which implies that we are # treating the ROI as a single line of text config = ("-l eng --oem 1 --psm 7") text = pytesseract.image_to_string(roi, config=config) # add the bounding box coordinates and OCR'd text to the list # of results results.append(((startX, startY, endX, endY), text)) # sort the results bounding box coordinates from top to bottom results = sorted(results, key=lambda r: r[0][1]) # loop over the results for ((startX, startY, endX, endY), text) in results: # display the text OCR'd by Tesseract send_message(CHAN, text)
def detect(image): """ Function to perform detection on an image :param image: source cv image matrix to perform detection :type image: mat :return: detected boxes list by it's coordinates in order of startX, startY, endX, endY :rtype: int[[]] """ global rW global rH orig = image.copy() (H, W) = image.shape[:2] # resize (newW, newH) = (width, height) rW = W / float(newW) rH = H / float(newH) image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector net = cv2.dnn.readNet( 'C:/Users/turnt/OneDrive/Desktop/Rob0Workspace/opencv-text-detection/frozen_east_text_detection.pb' ) # construct a blob from the image and then perform a forward pass of the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() net.setInput(blob) (scores, geometry) = net.forward(layerNames) end = time.time() print("text detection took {:.6f} seconds".format(end - start)) # grab the number of rows and columns from the scores volume, then # initialize our set of bounding box rectangles and corresponding # confidence scores (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the geometrical # data used to derive potential bounding box coordinates that # surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, ignore it if scoresData[x] < conf: continue # print("Found! Conf:{}".format(scoresData[x])) # compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) # drawBoxes(boxes, orig) return boxes
def detector(filename): im = cv2.imread(filename) im = imutils.resize(im, width=min(400, im.shape[1])) min_wdw_sz = (64, 128) step_size = (10, 10) downscale = 1.6 clf = joblib.load(os.path.join(model_path, 'svm.model')) #List to store the detections detections = [] #The current scale of the image scale = 0 for im_scaled in pyramid_gaussian(im, downscale=downscale): #The list contains detections at the current scale if im_scaled.shape[0] < min_wdw_sz[1] or im_scaled.shape[ 1] < min_wdw_sz[0]: break for (x, y, im_window) in sliding_window(im_scaled, min_wdw_sz, step_size): if im_window.shape[0] != min_wdw_sz[1] or im_window.shape[ 1] != min_wdw_sz[0]: continue im_window = color.rgb2gray(im_window) fd = hog(im_window, orientations=9, pixels_per_cell=(6, 6), cells_per_block=(2, 2), block_norm='L1', visualise=False, transform_sqrt=False, feature_vector=True, normalise=None) fd = fd.reshape(1, -1) pred = clf.predict(fd) if pred == 1: if clf.decision_function(fd) > 0.5: detections.append( (int(x * (downscale**scale)), int(y * (downscale**scale)), clf.decision_function(fd), int(min_wdw_sz[0] * (downscale**scale)), int(min_wdw_sz[1] * (downscale**scale)))) scale += 1 clone = im.copy() rects = np.array([[x, y, x + w, y + h] for (x, y, _, w, h) in detections]) sc = [score[0] for (x, y, score, w, h) in detections] print("sc: ", sc) sc = np.array(sc) pick = non_max_suppression(rects, probs=sc, overlapThresh=0.3) #print ("shape, ", pick.shape) for (x_tl, y_tl, _, w, h) in detections: cv2.rectangle(im, (x_tl, y_tl), (x_tl + w, y_tl + h), (0, 255, 0), thickness=2) for (xA, yA, xB, yB) in pick: cv2.rectangle(clone, (xA, yA), (xB, yB), (0, 255, 0), 2) plt.axis("off") plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) plt.title("Raw Detection before NMS") plt.show() plt.axis("off") plt.imshow(cv2.cvtColor(clone, cv2.COLOR_BGR2RGB)) plt.title("Final Detections after applying NMS") plt.show()
def detect_cmnd(image): orig = image.copy() (H, W) = image.shape[:2] (newW, newH) = (320, 320) rW = W / float(newW) rH = H / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] net = cv2.dnn.readNet('frozen_east_text_detection.pb') # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the geometrical # data used to derive potential bounding box coordinates that # surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, ignore it if scoresData[x] < 0.3: continue # compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) MAX_y = 0 MIN_y = orig.shape[0] MAX_X = 0 MIN_X = orig.shape[1] for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective startY = int(startY * rH) startX = int(startX * rW) endY = int(endY * rH) endX = int(endX * rW) if endY > MAX_y: MAX_y = endY if startY < MIN_y: MIN_y = startY if startX < MIN_X: MIN_X = startX if endX > MAX_X: MAX_X = endX new_img = orig[MIN_y:MAX_y + 30, MIN_X - 30:] return new_img
def _detect_text(self): """ """ # load the input image and grab the image dimensions image = cv2.imread(self.image_path) orig = image.copy() (origH, origW) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (self.width, self.height) rW = origW / float(newW) rH = origH / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) self.net.setInput(blob) (scores, geometry) = self.net.forward(self.layerNames) # decode the predictions, then apply non-maxima suppression to # suppress weak, overlapping bounding boxes (rects, confidences) = self._decode_predictions(scores, geometry) boxes = non_max_suppression(np.array(rects), probs=confidences) # initialize the list of results results = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # in order to obtain a better OCR of the text we can potentially # apply a bit of padding surrounding the bounding box -- here we # are computing the deltas in both the x and y directions dX = int((endX - startX) * self.padding) dY = int((endY - startY) * self.padding) # apply padding to each side of the bounding box, respectively startX = max(0, startX - dX) startY = max(0, startY - dY) endX = min(origW, endX + (dX * 2)) endY = min(origH, endY + (dY * 2)) # extract the actual padded ROI roi = orig[startY:endY, startX:endX] # in order to apply Tesseract v4 to OCR text we must supply # (1) a language, (2) an OEM flag of 4, indicating that the we # wish to use the LSTM neural net model for OCR, and finally # (3) an OEM value, in this case, 7 which implies that we are # treating the ROI as a single line of text config = ("-l eng --oem 1 --psm 7") text = pytesseract.image_to_string(roi, config=config) # add the bounding box coordinates and OCR'd text to the list # of results results.append(((startX, startY, endX, endY), text)) # sort the results bounding box coordinates from top to bottom results = sorted(results, key=lambda r: r[0][1]) return results
print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet(args["east"]) # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) # decode the predictions, then apply non-maxima suppression to # suppress weak, overlapping bounding boxes (rects, confidences) = decode_predictions(scores, geometry) boxes = non_max_suppression(np.array(rects), probs=confidences) # initialize the list of results results = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # in order to obtain a better OCR of the text we can potentially # apply a bit of padding surrounding the bounding box -- here we
def main(): try: # initialize leds gpio.setmode(gpio.BCM) gpio.setup(17, gpio.OUT) gpio.setup(27, gpio.OUT) gpio.output(27, True) # initialize the HOG descriptor/person detector camera = PiCamera() camera.hflip = True camera.vflip = True camera.resolution = (320, 240) camera.framerate = 8 rawCapture = PiRGBArray(camera, size=(320, 240)) time.sleep(0.25) hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) Threshold = 0 features_number = 0 tracked_features = None detected = False for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True): if not detected: # detection block gpio.output(17, False) Threshold = 0 unchangedPointsMap = dict() current_frame = frame.array #current_frame = imutils.resize(current_frame, width = 300) current_frame_copy = current_frame.copy() current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) # detect people in the image (rects, weights) = hog.detectMultiScale(current_frame, winStride=(4, 4), padding=(8, 8), scale=1.5) # draw the original bounding boxes for i in range(len(rects)): x, y, w, h = rects[i] rects[i][0] = x + 15 rects[i][1] = y + 40 rects[i][2] = w - 30 rects[i][3] = h - 40 for (x, y, w, h) in rects: cv2.rectangle(current_frame_copy, (x, y), (x + w, y + h), (0, 0, 255), 2) # Filter boxes rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(current_frame, (xA, yA), (xB, yB), (0, 255, 0), 2) print("{} original boxes, {} after suppression".format( len(rects), len(pick))) if len(rects) > 0: features, height_from_floor = find_features( current_frame, rects[0], 0) #print(features) detected = True gpio.output(17, True) if detected: # Tracking block if Threshold == 0: features_number = len(features) Threshold = features_number * threshold_percent #print ("Threshold" + str(Threshold)) if features_number < Threshold: print("Features less than threshold") detected = False else: rawCapture.truncate(0) next_frame = frame.array #next_frame = imutils.resize(next_frame, width = 300) current_frame_copy = next_frame.copy() next_frame = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY) #-----------Tracking using LK --------------------------- try: features = np.array(features) (tracked_features, status, feature_errors) = cv2.calcOpticalFlowPyrLK( current_frame, next_frame, features, None, **lk_params) arr_x = [] arr_y = [] for i in range(len(tracked_features)): f = tracked_features[i] x = f[0][0] y = f[0][1] arr_x.append(x) arr_y.append(y) arr_x = sorted(arr_x) arr_y = sorted(arr_y) mid = len(arr_x) / 2 X = arr_x[mid] mid = len(arr_y) / 2 Y = arr_y[mid] #print(X) new_feature_number = 0 temp_set_number = [] temp_distance = [] j = 0 print("Height_from_floor" + str(height_from_floor)) print("num" + str(features_number)) #print ("Status" + str(status)) #print ("Status[0] " + str(status[0])) #print ("Status[1] " + str(status[1])) #print ("Status[1][0] " + str(status[1][0])) for i in range(features_number): if status[i][0] == 1: new_feature_number += 1 temp_distance.append(height_from_floor[i]) print(temp_distance) height_from_floor = [] print("Here") for i in range(len(temp_distance)): height_from_floor.append(temp_distance[i]) print("Here2") features_number = new_feature_number features = [] for i in range(features_number): features.append(tracked_features[i]) features = np.array(features) tracked_features = [] current_frame = next_frame.copy() except Exception, e: raise e #-------Compute Distance -------------------- status, v = scaled_people_floor(features_number, features, height_from_floor) if status: distance = compute_distance(v) print(distance) cv2.putText(current_frame_copy, str(distance), (current_frame_copy.shape[1] - 200, current_frame_copy.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 3) #-------Showing Points ------------------------ for i in range(features_number): cv2.circle(current_frame_copy, tuple(features[i][0]), 3, 255, -1) cv2.circle(current_frame_copy, (X, Y), 5, (0, 0, 255), -1) # show the output images cv2.imshow("HOG", current_frame_copy) key = cv2.waitKey(1) & 0xFF rawCapture.truncate(0) if key == ord("w"): break except KeyboardInterrupt, SystemExit: gpio.output(27, False) gpio.output(17, False) camera.release() cv2.destroyAllWindows() raise
def get_charaters(image, resise_factor = 20, confidence_limit=0.5, padding=7): ## Image preprocessing # Resise image height_resised = 32 * resise_factor width_resised = 32 * resise_factor image = cv2.resize(image, (height_resised, width_resised)) # Save orginial image for drawing puposes later originial_image = image # Display image # cv2.imshow("Image", image) # cv2.waitKey(0) # cv2.imwrite('images/original.jpg', image) ## Text detection using EAST # Load pre-trained text detector net = cv2.dnn.readNet('frozen_east_text_detection.pb') # Define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # Construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (height_resised, width_resised), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) scores, geometry = net.forward(layerNames) # Grab the number of rows and columns from the scores volume, then # initialize our set of bounding box rectangles and corresponding # confidence scores (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # Loop over the number of rows for y in range(0, numRows): # Extract the scores (probabilities), followed by the geometrical # data used to derive potential bounding box coordinates that # surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # Loop over the number of columns for x in range(0, numCols): # If our score does not have sufficient probability, ignore it if scoresData[x] < confidence_limit: continue # Compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # Extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # Use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # Compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # Add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # Apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) ## Running OCR # Gray scale image image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Applying adaptive thresholding image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1) # cv2.imshow('Adaptive threshold', image) # cv2.waitKey(0) # cv2.imwrite('images/thresholded.jpg', image) # Applying gaussian blur # image = cv2.GaussianBlur(image, (7, 7), 0) # cv2.imshow('Gaussian blur', image) # cv2.waitKey(0) # Loop over the bounding boxes and get text results = [] for (startX, startY, endX, endY) in boxes: # Apply padding to bounding boxes startX = max(0, startX - padding) startY = max(0, startY - padding) endX = min(width_resised, endX + padding) endY = min(height_resised, endY + padding) # Extract bounding box as roi (region of interest) roi = image[startY:endY, startX:endX] roi = cv2.adaptiveThreshold(roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1) # In order to apply Tesseract v4 to OCR text we must supply # (1) a language, (2) an OEM flag of 4, indicating that the we # wish to use the LSTM neural net model for OCR, and finally # (3) an OEM value, in this case, 7 which implies that we are # treating the ROI as a single line of text config = ("-l eng --oem 1 --psm 7") text = pytesseract.image_to_string(roi, config=config) # add the bounding box coordinates and OCR'd text to the list # of results results.append(((startX, startY, endX, endY), text)) ## Printing result # Sort the results bounding box coordinates from top to bottom results = sorted(results, key=lambda r:r[0][1]) # Loop over the results ocr_outputs = [] marked_image = originial_image for ((startX, startY, endX, endY), text) in results: # display the text OCR'd by Tesseract print("OCR TEXT") print("========") print("{}\n".format(text)) # Draw the bounding box on the image (TESING PURPOSES ONLY) cv2.rectangle(marked_image, (startX, startY), (endX, endY), (0, 255, 0), 2) # Strip out non-ASCII text so we can draw the text on the image # using OpenCV, then draw the text and a bounding box surrounding # the text region of the input image text = "".join([c if ord(c) < 128 else "" for c in text]).strip() cv2.putText(marked_image, text, (startX, startY - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3 ) # Add outputs to list to be returned ocr_outputs.append(text) # Show the output image # cv2.imshow("Text detected", marked_image) # cv2.waitKey(0) # cv2.imwrite('images/final.jpg', marked_image) # Return all ocr outputs return originial_image, image, marked_image, ocr_outputs
def OpenFile(): name = askopenfilename(initialdir="/", title="Select file", filetypes=(("jpeg files", "*.jpg *.png"), ("all files", "*.*"))) global fileName fileName = name global image_address image_address = fileName try: img = ImageTk.PhotoImage(Image.open(fileName)) p2 = tk.Label(root, image=img).pack() except Exception: pass #detection code start from here #constant variables for text detection min_confidence = 0.5 width = 320 height = 320 # load the input image image = cv2.imread(image_address) orig = image.copy() (H, W) = image.shape[:2] # set new width and height (newW, newH) = (width, height) rW = W / float(newW) rH = H / float(newH) # resize the image image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] #the first layer is the output probabilities #the second layer is used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet('frozen_east_text_detection.pb') # construct a blob from the image # create the model with the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) # get rows and coloums from score #intialize rectangles gor bounding boxs and confidence score (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over rows for y in range(0, numRows): # get the score data and dimensions of the rectangle scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over columns for x in range(0, numCols): # score not confident if scoresData[x] < min_confidence: continue # compute the offset factor (offsetX, offsetY) = (x * 4.0, y * 4.0) # get rotation angle and make sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # width and height of the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute x and y coordinates of the bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the rectangles and score made rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # apply non_max_suppression boxes = non_max_suppression(np.array(rects), probs=confidences) #sorting of text arrangement def func(val): return val[1] boxes = sorted(boxes, key=func) #function to find if r1 rectangle contains r2 rectangle def contains(r1, r2): return (r1[0] < r2[0] < r2[0] + r2[2] < r1[0] + r1[2]) and ( r1[1] < r2[1] < r2[1] + r2[3] < r1[1] + r1[3]) #list to contain all text in character form #eg crop_imag = [[H,E,L,L,O][W,O,R,L,D]] crop_img = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) #croping of text from original image unit = orig[startY:endY, startX:endX] #changing cropped image to grayscale image gray_img = cv2.cvtColor(unit, cv2.COLOR_BGR2GRAY) #finding threshold value from the grayscale average value threshold = np.mean(gray_img) #getting the binary image with the help of thresholding _, thresh = cv2.threshold(gray_img, threshold, 255, cv2.THRESH_BINARY) #diliation and erosion # thresh = cv2.dilate(thresh, kernel, iterations=1) # thresh = cv2.erode(thresh, kernel, iterations=1) #Finding contours in the binary image contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) #list of contour polygon and bounding rectangle contours_poly = [None] * len(contours) boundRect = [None] * len(contours) #looping in the contour list for i, c in enumerate(contours): #making polygons of the contours contours_poly[i] = cv2.approxPolyDP(c, 3, True) #making rectangles from that polygon # returns x,y,h,w of rectangle in a list boundRect[i] = cv2.boundingRect(contours_poly[i]) #sorting the rectangle boxes into ascending order of x boundRect = sorted(boundRect, key=lambda x: x[0]) #list to crop character image from text char_crop = [] #cropping the rectangles that are not in another rectangle for i in range(len(boundRect)): count = 1 for j in range(len(boundRect)): if not i == j: if not contains(boundRect[j], boundRect[i]): count += 1 if count == len(boundRect): char_crop.append( unit[boundRect[i][1]:boundRect[i][1] + boundRect[i][3], boundRect[i][0]:boundRect[i][0] + boundRect[i][2]]) #adding the cropped characters to the final list crop_img.append(char_crop) #importing keras files to load trained model from keras.models import model_from_json # load json and create model json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights("model.h5") # Compile model loaded_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) #funtion to return the predicition location that is equal to 1 def result(array): rt = 26 for i in range(0, len(array[0])): if array[0][i] == 1: rt = i return rt #prediction list as the model was trained predict_list = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '' ] global content content = '' #predicting every character in the text in the crop_img for text_img in crop_img: for image in text_img: image = cv2.resize(image, (32, 32)) image = np.expand_dims(image, axis=0) rslt = result(loaded_model.predict(image)) content += predict_list[rslt] content += ' ' print(content) out = tk.Label(root, justify=tk.CENTER, padx=10, text=content).pack()
def main(): # read the video readVideo = cv2.VideoCapture("london_bus.mp4") cv2.namedWindow("Pedestrian Detection") detectedPedestrians = {} firstFrame = True frames = 0 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', fourcc, 240.0, (640, 480)) pauseVideo = False while True: if pauseVideo == False: flagCaptured, frame = readVideo.read() #print('Frame=',frame) if (flagCaptured is False): print("could not get frame") break # initialize the HOG descriptor hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) # winStride and hitThreshold are used to adjust for maximizing detections and reducing # false positives (rects, weights) = hog.detectMultiScale(frame, winStride=(8, 8), padding=(8, 8), scale=1.05, hitThreshold=0.22) # get bigger than needed bounding boxes and then apply non-maxima suppression rectBoxes = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) # apply non-maxima suppression to the huge bounding boxes suppressedRectBoxes = non_max_suppression(rectBoxes, probs=None, overlapThresh=0.95) counter = 0 # draw the final bounding boxes for (xA, yA, xB, yB) in suppressedRectBoxes: cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2) # identifying a new box is donw only once. An already identified box is tracked # instead of detecting all over again if firstFrame is True: detectedPedestrians[counter] = pedestrianTracking( counter, frame, (xA, yA, abs(xB - xA), abs(yB - yA))) #(id, frame, bounding_box) counter += 1 for key, value in detectedPedestrians.items(): value.update_predict(frame) firstFrame = False frames += 1 #print(frames) cv2.imshow("Pedestrian Detection", frame) out.write(frame) # press ESC to close video window if (cv2.waitKey(10) & 0xFF) == 27: cv2.destroyWindow("Pedestrian Detection") break # press spacebar to pause video if (cv2.waitKey(10) & 0xFF) == 32: print('Video paused') pauseVideo = True # press enter to resume if (cv2.waitKey(10) & 0xFF) == 13: print('Video resumed') pauseVideo = False out.release() readVideo.release()
def get_caffe_detections(fname, img): detector = init_caffe() pretrained_model = "./models/bvlc_reference_rcnn_ilsvrc13.caffemodel" #help="Trained model weights file." model_def = "./models/deploy.prototxt" #help="Model definition file." labels_file = './models/det_synset_words.txt' COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax'] boxes = my_sliding_windows.get_windows(fname, img, 40, 106) #50, 256 TESTDATA = io.StringIO(my_sliding_windows.get_str_to_csv(boxes)) # Load input. t = time.time() print("Loading input...") f = TESTDATA # Detect. # 123 index is human inputs = pd.read_csv(f, sep=',', dtype={'filename': str}) inputs.set_index('filename', inplace=True) # Unpack sequence of (image filename, windows). images_windows = [ (ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values) for ix in inputs.index.unique() ] detections = detector.detect_windows(images_windows) #using selective search # detections = detector.detect_selective_search(inputs) print("Processed {} windows in {:.3f} s.".format(len(detections), time.time() - t)) #loop through the output and filter humans #get labels for classes with open(labels_file) as f: labels_df = [ { 'synset_id': l.strip().split(' ')[0], 'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0] } for l in f.readlines() ] # print "detections object: ", detections[0] # imgcpy = cv2.imread(fname) totalrects = [] for i in detections: prdt = i['prediction'] maxp = max(prdt) prdt = map(lambda x: (x), prdt); maxi = prdt.index(maxp) if maxi == 123 and maxp > 0.5: # print "human detected!" coord = i['window'] totalrects.append([coord[1], coord[0], coord[3], coord[2]]) print "Total humans detected: ", str(len(totalrects)) #non max suppression totalrects = np.array(totalrects) processedboxes = non_max_suppression(totalrects, probs=None, overlapThresh=0.55) #overlapThresh default 0.65 print "Total humans after NMS correction: ", str(len(processedboxes)) return processedboxes
def main(): try: # initialize leds gpio.setmode(gpio.BCM) gpio.setup(17, gpio.OUT) gpio.setup(27, gpio.OUT) gpio.output(27, True) # initialize the HOG descriptor/person detector camera = PiCamera() camera.hflip = True camera.vflip = True camera.resolution = (320, 240) camera.framerate = 8 rawCapture = PiRGBArray(camera, size=(320, 240)) time.sleep(0.25) hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) Threshold = 0 features_number = 0 tracked_features = None detected = False for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True): if not detected: # detection block gpio.output(17, False) Threshold = 0 unchangedPointsMap = dict() current_frame = frame.array #current_frame = imutils.resize(current_frame, width = 300) current_frame_copy = current_frame.copy() current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) # detect people in the image (rects, weights) = hog.detectMultiScale(current_frame, winStride=(4, 4), padding=(8, 8), scale=1.5) # draw the original bounding boxes for i in range(len(rects)): x, y, w, h = rects[i] rects[i][0] = x + 15 rects[i][1] = y + 40 rects[i][2] = w - 30 rects[i][3] = h - 40 for (x, y, w, h) in rects: cv2.rectangle(current_frame_copy, (x, y), (x + w, y + h), (0, 0, 255), 2) # Filter boxes rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(current_frame, (xA, yA), (xB, yB), (0, 255, 0), 2) print("{} original boxes, {} after suppression".format(len(rects), len(pick))) if len(rects) > 0: features, height_from_floor = find_features(current_frame, rects[0], 0) #print(features) detected = True gpio.output(17, True) if detected: # Tracking block if Threshold == 0: features_number = len(features) Threshold = features_number * threshold_percent #print ("Threshold" + str(Threshold)) if features_number < Threshold: print ("Features less than threshold") detected = False else: rawCapture.truncate(0) next_frame = frame.array #next_frame = imutils.resize(next_frame, width = 300) current_frame_copy = next_frame.copy() next_frame = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY) #-----------Tracking using LK --------------------------- try: features = np.array(features) (tracked_features, status, feature_errors) = cv2.calcOpticalFlowPyrLK(current_frame, next_frame, features, None, **lk_params) arr_x = [] arr_y = [] for i in range(len(tracked_features)): f = tracked_features[i] x = f[0][0] y = f[0][1] arr_x.append(x) arr_y.append(y) arr_x = sorted(arr_x) arr_y = sorted(arr_y) mid = len(arr_x)/2 X = arr_x[mid] mid = len(arr_y)/2 Y = arr_y[mid] #print(X) new_feature_number = 0 temp_set_number = [] temp_distance = [] j = 0 print ("Height_from_floor" + str(height_from_floor)) print ("num" + str(features_number)) #print ("Status" + str(status)) #print ("Status[0] " + str(status[0])) #print ("Status[1] " + str(status[1])) #print ("Status[1][0] " + str(status[1][0])) for i in range(features_number): if status[i][0] == 1: new_feature_number += 1 temp_distance.append(height_from_floor[i]) print (temp_distance) height_from_floor = [] print ("Here") for i in range(len(temp_distance)): height_from_floor.append(temp_distance[i]) print ("Here2") features_number = new_feature_number features = [] for i in range(features_number): features.append(tracked_features[i]) features = np.array(features) tracked_features = [] current_frame = next_frame.copy() except Exception, e: raise e #-------Compute Distance -------------------- status, v = scaled_people_floor(features_number, features, height_from_floor) if status: distance = compute_distance(v) print (distance) cv2.putText(current_frame_copy, str(distance),(current_frame_copy.shape[1] - 200, current_frame_copy.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), 3) #-------Showing Points ------------------------ for i in range(features_number): cv2.circle(current_frame_copy, tuple(features[i][0]), 3, 255, -1) cv2.circle(current_frame_copy, (X,Y), 5, (0,0,255), -1) # show the output images cv2.imshow("HOG", current_frame_copy) key = cv2.waitKey(1) & 0xFF rawCapture.truncate(0) if key == ord("w"): break except KeyboardInterrupt, SystemExit: gpio.output(27, False) gpio.output(17, False) camera.release() cv2.destroyAllWindows() raise
def do_for_all(original_image, index): # construct the argument parser and parse the arguments # load the input image and grab the image dimensions # image = cv2.imread(args["image"]) # Reading Image if os.path.isfile(original_image): image = cv2.imread(original_image) else: print('unable to read file in tesseract_text_detection.py') return # Resize Image to Standard Ratio image = resize_toStandard(image, "Adhar") temp = image orig = image.copy() (H, W) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (args["width"], args["height"]) rW = W / float(newW) rH = H / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet("frozen_east_text_detection.pb") # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() net.setInput(blob) (scores, geometry) = net.forward(layerNames) end = time.time() # show timing information on text prediction print("[INFO] text detection took {:.6f} seconds".format(end - start)) # grab the number of rows and columns from the scores volume, then # initialize our set of bounding box rectangles and corresponding # confidence scores (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the geometrical # data used to derive potential bounding box coordinates that # surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, ignore it if scoresData[x] < args["min_confidence"]: continue # compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) i=0 # if not exist(os.path("./output")): # os.mkdir('./output') # loop over the bounding boxes temp = resize_toStandard(temp, "Adhar") # cv2.imshow('hey', temp) # print(boxes) mod_boxes = [] for a,b,c,d in boxes: if(b<=290 and d<=290 and b>=56 and d>=56 and (c-a)>=50): mod_boxes.append([a,b,c,d]) # print(mod_boxes) for i in range(len(boxes)): boxes[i][0] = boxes[i][0] - 5 # startX boxes[i][1] = boxes[i][1] - 5 # startY boxes[i][2] = boxes[i][2] + 12 # endX boxes[i][3] = boxes[i][3] + 5 # endY for i in range(len(boxes)): for j in range(i): box1 = boxes[i] box2 = boxes[j] if check_merge(box1, box2, i, j): # print("cool") # print("boxes : ", boxes,"\n box1 : =" ,box1,"\n box2 : ", box2) if boxes[i][0] < boxes[j][0]: boxes[i] = boxes[j] = [boxes[i][0], boxes[i][1], boxes[j][2], boxes[j][3]] else: boxes[i] = boxes[j] = [boxes[j][0], boxes[j][1], boxes[i][2], boxes[i][3]] for i in range(len(boxes)): for j in range(i): box1 = boxes[i] box2 = boxes[j] if check_merge(box1, box2, i, j): # print("cool") # print("boxes : ", boxes,"\n box1 : =" ,box1,"\n box2 : ", box2) if boxes[i][0] < boxes[j][0]: boxes[i] = boxes[j] = [boxes[i][0], boxes[i][1], boxes[j][2], boxes[j][3]] else: boxes[i] = boxes[j] = [boxes[j][0], boxes[j][1], boxes[i][2], boxes[i][3]] # for box in boxes: newboxes=[] flag = True #newboxes1=[] #print(boxes) for (a,b,c,d) in boxes: if(b<=290 and d<=290 and b>=56 and d>=56 and (c-a)>=70): if not len(newboxes): newboxes.append([a,b,c,d]) else: for i in range(len(newboxes)): ( sx, sy, ex, ey) = newboxes[i] #print(i, a, b, c, d, " : " ,sx, sy, ex, ey, (sx == a and sy == b), (ex == c and ey == d), ((sx == a and sy == b) and (ex == c and ey == d))) if ((sx == a and sy == b) and (ex == c and ey == d)): flag = False # print(newboxes) if flag: newboxes.append([a, b, c, d]) flag = True #print(boxes) #print(newboxes) newboxes.sort(key = conditional_sort) # print(newboxes) newboxes.pop(0) #if len(newboxes) > 2: #newboxes[1][0] = newboxes[1][0] + (newboxes[1][2] - newboxes[1][1])*0.58 #newboxes[2][0] = newboxes[2][0] + (newboxes[2][2] - newboxes[2][1])*0.71 ind = -1 # print(boxes, len(boxes)) # print(mod_boxes) text_recognized = [] # text_recognized.append(original_image.split('./')[len(original_image.split('./'))-1]) print("############", index, "############") for (startX, startY, endX, endY) in newboxes: # scale the bounding box coordinates based on -the respective # ratios ind += 1 if(ind==1): startX-= (startX-endX)*0.55 elif(ind==2): startX-= (startX-endX)*0.45 # print("i = ", ind , startX, startY, endX, endY) startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) imgName = "./output/crop"+str(index)+"_"+str(ind)+".png" # print("actual i: " , ind, "dim : ", startX, startY, endX, endY) cv2.imwrite(imgName, temp[startY: endY, startX: endX]) text = pytesseract.image_to_string(Image.open(imgName), lang='eng', \ config='--psm 8 --oem 3 -c tessedit_char_whitelist= 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/') imgName = imgName.split('./')[len(imgName.split('./'))-1] imgName = imgName.split('/')[len(imgName.split('/'))-1] text_recognized.append(imgName) text_recognized.append(text) print("-=======",imgName,"======--") print(text) print("------------------") label = str(ind) # draw the bounding box on the image # cv2.rectangle(orig, (startX-10, startY-5), (endX+10, endY+5), (0, 255, ind*10), 2) cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, ind*10), 2) cv2.putText(orig,label,(startX-10, startY-5),cv2.FONT_HERSHEY_COMPLEX,0.5,(0,0,0),1) # show the output image print("===========================\n\n") if len(text_recognized) == 8: new_num = text_recognized.pop() n = '' for x in new_num: if (ord(x)<=57 and ord(x)>=48) or ord(x) == ord(' '): n+=x text_recognized.append(n) p = "./fullCard/"+str(index)+".png" cv2.imwrite(p, orig) #pt.imshow(orig) #pt.show() # cv2.imshow("Text Detection", orig) # cv2.waitKey(0) return text_recognized
def body_detection(): global frame_hist global first global tracker_count global t_x global t_y global t_w global t_h global t_x_bar global t_y_bar global output_file_path global finalLOG_file_path global last_server_resp global last_upload global save_body_path global t global prev_frame global frame2gray global frame1gray global result_frame while True: # read each frame ret, frame = webcam.read() if ret is False: print('NO FRAME COMING, Return value is ',ret) # Un comment the below part iff you want to log system status every minute # ######### LOG THIS INFO IN FINAL LOG VERY MINUTE ################# # if time.time()-t > 60: # with open(finalLOG_file_path ,"a") as output: # output.write("_________ WRITING LOG AT "+time.strftime("%H:%M:%S")+" DATE "+time.strftime("%Y-%m-%d")+"_________ \n\n"+"CAMERA DEVICE ID: cv2.VideoCapture(1)\n") # #CHECK CAMERA # if webcam.isOpened(): # output.write("CAMERA FEED STATUS AT "+time.strftime("%H:%M:%S")+" AVAILABLE\n") # else: # output.write("CAMERA FEED STATUS AT " +time.strftime("%H:%M:%S")+ " NOT AVAILABLE \n") # #CHECK FRAME CAPTURE # if ret==True: # output.write("CURRENT FRAME STATUS AT " +time.strftime("%H:%M:%S")+" YES\n") # else: # output.write("CURRENT FRAME STATUS AT "+time.strftime("%H:%M:%S")+" NO\n") # # WRITE LAST UPLOAD AND SERVER RESPONSE TO FILE # try: # if not queue_LOG_LastUpload.empty(): # last_upload = queue_LOG_LastUpload.get() # if not queue_LOG_ServerResp.empty(): # last_server_resp = queue_LOG_ServerResp.get() # except: # logging.basicConfig(filename='errorLogs.txt',level=logging.DEBUG) # logging.info('problem getting values from diagonistcs queue') # # ADD LAST UPLOAD AND SERVER RESPONSE TO FILE # output.write("LAST UPLOADING FILE NAME: "+ last_upload + "\n"+"LAST SERVER RESPONSE: "+ last_server_resp +"\n\n") # output.write("_______________END OF LOG SEGMENT___________________\n\n\n") # output.close() # t=time.time() # ########################## # Pre process every frame for motion analysis if frame_hist == 0: hsv_roi = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180]) cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX) frame_hist = 1 # resize every frame to reduce computational power image = imutils.resize(frame, width=min(300, frame.shape[1])) orig = image.copy() # Process every frame for motion analysis # convert frame to gray frame2gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY ) #Absdiff to get the difference between the frames result_frame = cv2.absdiff(frame1gray,frame2gray) # Pre process every result frame for motion analysis result_frame = cv2.blur(result_frame,(5,5)) kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(5,5)) result_frame = cv2.morphologyEx(result_frame, cv2.MORPH_OPEN, kernel) result_frame = cv2.morphologyEx(result_frame, cv2.MORPH_CLOSE, kernel) val , result_frame = cv2.threshold(result_frame, 13, 255, cv2.THRESH_BINARY_INV) # Check for motion. If there is any motion try to detect humans. if somethingHasMoved(result_frame): # detect people in the frame using our hog classifier (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(0, 0), scale=1.1) # If no detection reset these values to any random large values if len(rects) == 0: t_x = 10001 t_y = 10001 t_w = 10031 t_h = 10031 t_x_bar = 100021 t_y_bar = 100011 # Iterate through all of the detected human bodies for i in range(len(rects)): body_i = rects[i] (x, y, w, h) = [v * 1 for v in body_i] # draw the bounding boxes for every detection cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to reduce overlapping rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # # Iterate through all of the detected human bodies after NON NAXIMA for i in range(len(pick)): body_i = pick[i] (xA, yA, xB, yB) = [int(v * 1) for v in body_i] # draw the final bounding boxes after NON MAXIMA #cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) # Now draw bounding boxes on original full resolution frame # First map the final bounding boxes to the original frame # Calculated multiplication factor varries with camera resolution # (4.28 for 1280x720) (6.4 for 1920x1080) (x1, y1, w1, h1) = [int(v * 4.28) for v in body_i] cv2.rectangle(frame, (x1, y1), (w1, h1), (0, 255, 55), 2) # for tracking , every time current rectangle is the new rectangle/bounding box curr_rect = (x1, y1, w1, h1) # for first run, set tracking window here if first == 0: track_window = curr_rect first = 1 #calculate the centerpoint of NEW rectangle/bounding boxes x_bar = x1 + 0.5 * w1 y_bar = y1 + 0.5 * h1 # CHECK IF CURRENT RECTANGLES LIES SOMWHERE IN THE PREVIOUS RECTANGLES if ((t_x <= x_bar <= (t_x + t_w)) and (t_y <= y_bar <= (t_y + t_h)) and (x1 <= t_x_bar <= (x1 + w1 )) and ( y1 <= t_y_bar <= (y1 + h1 ))): # If it lies somewhere in the previous rectangle do not reset the tracker, keep tracking the previous one #print ('RECT MATCHED - KEEP TRACKING - DONT RESET') hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1) # apply meanshift to get the new location ret, track_window = cv2.meanShift(dst, track_window, term_crit) x3,y3,w3,h3 = track_window x3 = ((x1-x3)+x3) y3 = ((y1-y3)+y3) w3 = ((w1-w3)+w3) h3 = ((h1-h3)+h3) # draw tracking rectangles cv2.rectangle(frame, (x3, y3),(w3, h3),rectangleColor ,2) # copy current rects in tracking rects (t_x , t_y , t_w , t_h) = curr_rect #calculate the centerpoints t_x_bar = t_x + 0.5 * t_w t_y_bar = t_y + 0.5 * t_h else: # If it does not lie in the previous rectangles , update the tracked and track the current/new one #print('NO MATCHING RECTS - UPDATE TRACKER - UPDATE RECTS') track_window = curr_rect hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1) # apply meanshift to get the new location ret, track_window = cv2.meanShift(dst, track_window, term_crit) x3,y3,w3,h3 = track_window x3 = ((x1-x3)+x3) y3 = ((y1-y3)+y3) w3 = ((w1-w3)+w3) h3 = ((h1-h3)+h3) # draw tracking rectangles cv2.rectangle(frame, (x3, y3),(w3, h3),rectangleColor ,2) # copy current rects in tracking rects (t_x , t_y , t_w , t_h) = curr_rect #calculate the centerpoints t_x_bar = t_x + 0.5 * t_w t_y_bar = t_y + 0.5 * t_h # Every time we hav new detection or bounding box save it to upload on server # Crop body from Original full resolution frame body_big = frame[y1:h1, x1:w1] # Uncomment this if you want your every detection on same aspect ratio i-e 1:2 ''' #################################### im_shape = body_big.shape #print('ORIGINAl Width: ',im_shape[0]) #print('ORIGINAL Height: ',im_shape[1]) aspect_ratio = float(float(im_shape[0]) / float(im_shape[1])) #print('ORIGINAL Aspect Ratio: ',aspect_ratio) ratio_check = float(1 / 1.67) #print ('Aspect Ratio Threshold: ', ratio_check) if aspect_ratio < (ratio_check) or aspect_ratio > (ratio_check): new_width = ratio_check * float(im_shape[1]) #print('NEW width: ', new_width) aspect_ratio = float(float(new_width) / float(im_shape[1])) #print('NEW aspect ratio: ', aspect_ratio) body_big = imutils.resize(body_big, width=int(new_width)) #################################### ''' # before saving the detected image first get current date and time to append it with name cur_date = (time.strftime("%Y-%m-%d")) cur_time = (time.strftime("%H:%M:%S")) # Append date and time new_pin =cur_date+"-"+cur_time # any hardcoded name you want filename1 = 'UNKNOWN' # Append new_pin and hardcoded name to make final name filename2 = str(filename1)+'-'+str(new_pin) # this is your final image with the path to where it is located sampleFile = ('%s/%s.png' % (save_body_path, filename2)) #Save image in a folder, save_body_path has the full path to the folder where we want to save images cv2.imwrite('%s/%s.png' % (save_body_path, filename2), body_big) # For Face Detection read each image from the location where we saved it person = cv2.imread(sampleFile) # Pass the image to face detector, we are using dlib's face detector dets = detector(person, 1) print("Number of faces detected: {}".format(len(dets))) # Iterate through all of detected bounding boxes/faces for i, d in enumerate(dets): print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( i, d.left(), d.top(), d.right(), d.bottom())) # Crop the faces/bounding boxes and save in a seperate folder for later use crop = person[d.top():d.bottom(), d.left():d.right()] # before saving the detected image first get current date and time to append it with name cur_date = (time.strftime("%Y-%m-%d")) cur_time = (time.strftime("%H:%M:%S")) new_pin =cur_date+"-"+cur_time facename1 = 'FACE' facename2 = str(facename1)+'-'+str(new_pin) sampleFace = ('%s/%s.png' % (save_faces_path, facename2)) #Save Image Here cv2.imwrite('%s/%s.png' % (save_faces_path, facename2), crop) # Put detected bodies in Queue... un comment below line if you have upload process running # queue_BODIES.put(sampleFile) # show the output images cv2.imshow("Before NMS", orig) #cv2.imshow("After NMS", image) #cv2.imshow("ANZEN", frame) #cv2.imshow("thres", result_frame) # Always copy current frame to prev frame to consider it as a background for motion analysis frame1gray = frame2gray.copy() key = cv2.waitKey(10) if key == 27: break
def text_detect_and_recognition(img): #This two lines are for preserving data (image, height, and width) of the original image org = img (H, W) = img.shape[:2] #Setting the image to the correct parameters to be turned later into a blob (newW, newH) = (640, 320) rW = W / float(newW) rH = H / float(newH) image = cv2.resize(img, (newW, newH)) (H, W) = img.shape[:2] #Layers of the neural net for detecting where the text is in the image layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] #Modifiying the image to the correct format in order to pass it through the neural net blob = cv2.dnn.blobFromImage(img, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) #These two variable represent the output of the neural net (scores, geometry) = net.forward(layerNames) (numRows, numCol) = scores.shape[2:4] rects = [] confidences = [] '''This for loop is for choosing the rectangles that have sufficient confidence as to if it does has text inside the rectangle. We save the parameters of this rect.''' for y in range(0, numRows): scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] for x in range(0, numCol): if scoresData[x] < 0.5: continue (offsetX, offsetY) = (x * 4.0, y * 4.0) angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY + (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) boxes = non_max_suppression(np.array(rects), probs=confidences) '''This for loop is for actually drawing the bounding box in the original image and passing that region of interest (roi) through the pytesseract functions.''' for (startX, startY, endX, endY) in boxes: print(startX, startY, endX, endY ) newstartX = int(startX * rW) newstartY = int(startY * rH) newendX = int(endX * rW) newendY = int(endY * rH) boundary = 5 roi = org[newstartY - boundary: newendY + boundary, newstartX - boundary: newendX + boundary] text = cv2.cvtColor(roi.astype(np.uint8), cv2.COLOR_BGR2GRAY) cong = r'--oem 2' textRecognized = pytesseract.image_to_string(text) textRecognized = textRecognized.replace("\n", "") textRecognized = textRecognized[:-1] cv2.rectangle(org, (startX, startY), (endX, endY), (0, 255, 0), 2) org = cv2.putText(org, textRecognized, (endX, endY+5), cv2.FONT_ITALIC, fontScale=0.5, color=(0, 0, 0)) return org
def detect(args): # load the input image and grab the image dimensions image = args["image"] (H, W) = image.shape[:2] if image.ndim == 2: image = cv2.merge((image, image, image)) orig = image.copy() # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (args["width"], args["height"]) rW = W / float(newW) rH = H / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector # print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet(args["east"]) # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() net.setInput(blob) (scores, geometry) = net.forward(layerNames) end = time.time() # show timing information on text prediction # print("[INFO] text detection took {:.6f} seconds".format(end - start)) # grab the number of rows and columns from the scores volume, then # initialize our set of bounding box rectangles and corresponding # confidence scores (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the geometrical # data used to derive potential bounding box coordinates that # surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, ignore it if scoresData[x] < args["min_confidence"]: continue # compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) imgs = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # img = cv2.resize(orig[startY:endY,startX:endX], None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC) imgs.append(orig[startY:endY, startX:endX]) # draw the bounding box on the image # cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2) return imgs # show the output image # cv2.imshow("Text Detection", orig) # cv2.waitKey(0)
batch_locations, labels, min_prob=arguments['confidence']) end = time.time() print(f'[INFO] Detections took {end - start:.4f} seconds.') for k in labels.keys(): clone = resized.copy() for (box, prob) in labels[k]: (x_start, y_start, x_end, y_end) = box cv2.rectangle(clone, (x_start, y_start), (x_end, y_end), (0, 255, 0), 2) cv2.imshow('Without NMS', clone) clone = resized.copy() boxes = np.array([p[0] for p in labels[k]]) proba = np.array([p[1] for p in labels[k]]) boxes = non_max_suppression(boxes, proba) for (x_start, y_start, x_end, y_end) in boxes: cv2.rectangle(clone, (x_start, y_start), (x_end, y_end), (0, 0, 255), 2) print(f'[INFO] {k}: {len(boxes)}') cv2.imshow('With NMS', clone) cv2.waitKey(0)
orig = image.copy() # detect people in the image (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) # draw the original bounding boxes for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) # show some information on the number of bounding boxes filename = imagePath[imagePath.rfind("/") + 1:] print("[INFO] {}: {} original boxes, {} after suppression".format( filename, len(rects), len(pick))) # show the output images cv2.imshow("Before NMS", orig) cv2.imshow("After NMS", image) cv2.waitKey(0)
def MotionDetection(inVideo, firstFrame, lastFrame): count = 0 cap = cv2.VideoCapture(inVideo) cap.set(cv2.CAP_PROP_POS_FRAMES, firstFrame) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('../output/output.avi', fourcc, 20.0, (frame_width, frame_height)) ret, previous_frame = cap.read() frames = [previous_frame] median = np.median(frames, axis=0).astype(dtype=np.uint8) # Loop over all frames while cap.isOpened(): # the read function gives two outputs. The check is a boolean function that returns if the video is being read ret, frame = cap.read() if not ret: break if count != lastFrame: if count % 3 == 0: if len(frames) == 3: np.median(frames, axis=0).astype(dtype=np.uint8) elif len(frames) > 3: frames = [frame] median = np.median(frames, axis=0).astype(dtype=np.uint8) else: frames.append(frame) current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) previous_frame_gray = cv2.cvtColor(median, cv2.COLOR_BGR2GRAY) dframe = cv2.absdiff(current_frame_gray, previous_frame_gray) # Treshold to binarize th, dframe = cv2.threshold(dframe, 35, 255, cv2.THRESH_BINARY) # Morphological Operation dilated = cv2.dilate(dframe, None, iterations=4) opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, kernel) closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel) (cnts, _) = cv2.findContours(closing, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in cnts: if cv2.contourArea(contour) < 100: # excluding too small contours. Set 10000 (100x100 pixels) for objects close to camera continue # obtain the corresponding bounding rectangle of our detected contour (x, y, w, h) = cv2.boundingRect(contour) offset = 30 if x < offset: x = offset if y < offset: y = offset selection = current_frame_gray[y - offset:y + h + offset, x - offset:x + w + offset] cars = car_cascade.detectMultiScale(selection, 1.1, 1) people = people_cascade.detectMultiScale(selection, 1.1, 1) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects_cars = np.array([[x, y, x + w, y + h] for (x, y, w, h) in cars]) rects_people = np.array([[x, y, x + w, y + h] for (x, y, w, h) in people]) pick_cars = non_max_suppression(rects_cars, probs=None, overlapThresh=0.95) pick_people = non_max_suppression(rects_people, probs=None, overlapThresh=0.95) # TODO add object dection for other things. for i in range(len(pick_cars)): cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) cv2.putText(frame, "C" + str(i + 1), (x, y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0)) for i in range(len(pick_people)): cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.putText(frame, "P" + str(i + 1), (x, y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0)) # this sadly does not work that's why its commented out # if not rects_cars.__contains__(contour) and not rects_people.__contains__(contour): # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2) # cv2.putText(frame, "O", (x, y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0)) out.write(frame) count += 1 # Release video object cap.release() return str(pathlib.Path().absolute()) + "/output/output.avi"
def find_text_and_blur(frame, net, min_confidence): # load the input image and grab the image dimensions image = cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR) orig = image.copy() (H, W) = image.shape[:2] # set the new width and height and then determine the ratio in change (newW, newH) = (round(W / 32) * 32, round(H / 32) * 32 ) # Round dimensions to nearest multiple of 32 rW = W / float(newW) rH = H / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] #net = cv2.dnn.readNet(eastPath) # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) # grab the number of rows and columns from the scores volume, then # initialize our set of bounding box rectangles and corresponding # confidence scores (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the geometrical # data used to derive potential bounding box coordinates that # surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, ignore it if scoresData[x] < min_confidence: continue # compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # add blurring based on boxes box_dim_img = orig[startY:endY, startX:endX] blur = cv2.GaussianBlur(box_dim_img, (101, 101), 0) orig[startY:endY, startX:endX] = blur return orig
def detector(video_capture, ww, hh, M, ppl_size, ROI_1, ROI_2, video_input = False): global display, sampling, NMS if (sampling == True): global image if (video_input == True): global frame_skip for i in range (frame_skip): video_capture.read() ret, image = video_capture.read() #rotate if (M != None): image = cv2.warpAffine(image, M, (ww, hh)) #resize image = imutils.resize(image, width=min(400, image.shape[1])) ##mask after resize image = image[ROI_1[1]:ROI_2[1],ROI_1[0]:ROI_2[0]] #Display - origin orig = image.copy() #detect (rects, weights) = hog.detectMultiScale(image, winStride=(8, 8), padding=(8, 8), scale=1.05) ##delete large rect i = 0 while (i < len(rects)): if (rects[i][2] > ppl_size[0] or rects[i][2] < ppl_size[1]): if (display == True): [x,y,w,h] = rects[i] cv2.rectangle(orig, (x, y), (x + w, y + h), (255, 0, 0), 2) rects = np.delete(rects,i,0) else: if (display == True): [x,y,w,h] = rects[i] cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) i += 1 #Display - origin # for (x, y, w, h) in rects: # #box size validation # print ('w = ' + str(w)) # if (w < 100): # cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) #Display - origin if (display == True): cv2.imshow('HOG', orig) #combine rectangle if (NMS == True): rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) #Display - result cv2.imshow("HOG + NMS", image) people = len(rects); if (sampling == True): if(random.random() > 0.9): #print ("save") fname = "./save/all_" + time.strftime("%m_%d_%H_%M_%S")+ ".jpg" cv2.imwrite(fname,image) if people > 0: return True else: return False
def getEnhanced(img): height = img.shape[1] width = img.shape[0] ratio = width / height newWidth = 960 newHeight = int(((newWidth * ratio) // 32) * 32) print(newHeight) img = cv2.resize(img, (newWidth, newHeight)) layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] net = cv2.dnn.readNet("frozen_east_text_detection.pb") blob = cv2.dnn.blobFromImage(img, 1.0, (newWidth, newHeight), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the geometrical # data used to derive potential bounding box coordinates that # surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, ignore it if scoresData[x] < 0.5: continue # compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX) startY = int(startY) endX = int(endX) endY = int(endY) print("text") # draw the bounding box on the image cv2.rectangle(img, (startX, startY), (endX, endY), (0, 255, 0), 2) return img
def main(): # initialize the HOG descriptor/person detector camera = cv2.VideoCapture(0); time.sleep(0.25) hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) Threshold = 0 features_number = 0 while True: # main loop tracked_features = None while True: # detection loop, loop over the images unchangedPointsMap = dict() # load the image and resize it to (1) reduce detection time # and (2) improve detection accuracy (grabbed, current_frame) = camera.read() current_frame = imutils.resize(current_frame, width = 300) current_frame_copy = current_frame.copy() current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) # detect people in the image (rects, weights) = hog.detectMultiScale(current_frame, winStride=(4, 4), padding=(8, 8), scale=1.5) # draw the original bounding boxes for i in range(len(rects)): x, y, w, h = rects[i] rects[i][0] = x + 15 rects[i][1] = y + 40 rects[i][2] = w - 30 rects[i][3] = h - 20 for (x, y, w, h) in rects: cv2.rectangle(current_frame_copy, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(current_frame, (xA, yA), (xB, yB), (0, 255, 0), 2) print("{} original boxes, {} after suppression".format(len(rects), len(pick))) # if len(rects) > 0: # features = find_features(current_frame, rects[0], 0) # print("NUM" + str(features_number)) # break # # cv2.imshow("HOG", current_frame_copy) # # key = cv2.waitKey(1) & 0xFF # # if key == ord("w"): # # break # features_number = len(features) # Threshold = features_number * threshold_percent # while True: # Tracking loop # #print ("Threshold" + str(Threshold)) # if features_number < Threshold: # print ("Features less than threshold") # break # else: # (grabbed, next_frame) = camera.read() # next_frame = imutils.resize(next_frame, width = 300) # if not grabbed: # print ("Camera read failed") # return # current_frame_copy = next_frame.copy() # next_frame = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY) # #-----------Tracking using LK --------------------------- # try: # features = np.array(features) # #print("Features" + str(features)) # (tracked_features, status, feature_errors) = cv2.calcOpticalFlowPyrLK(current_frame, next_frame, features, None, **lk_params) # #print("TEST") # # print("KEYS" + str(unchangedPointsMap.keys())) # # for i in range(len(tracked_features[0])): # # f = tracked_features[0][i] # # x = round(f[0]) # # y = round(f[1]) # # print("x and y" + str((x,y))) # # if (x,y) in unchangedPointsMap.keys(): # # unchangedPointsMap[(x,y)] += 1 # # print("ADDED" + str(unchangedPointsMap[(x,y)])) # # if unchangedPointsMap[(x,y)] == 30: # # print ("BEFORE" + str(tracked_features[0])) # # tracked_features = np.delete(tracked_features,i,0) # # unchangedPointsMap.pop((x,y)) # # print ("AFTER" + str(tracked_features[0])) # # else: # # unchangedPointsMap[(x,y)] = 0 # # print("BEFORE" + str(tracked_features)) # # tracked_features[tracked_features[:,0].argsort()] # # print("AFTER" + str(tracked_features)) # arr_x = [] # arr_y = [] # for i in range(len(tracked_features)): # f = tracked_features[i] # x = f[0][0] # y = f[0][1] # arr_x.append(x) # arr_y.append(y) # print("X_arr" + str(arr_x)) # print("Y_arr" + str(arr_y)) # print ("X SORTED " + str(sorted(arr_x))) # print ("Y SORTED " + str(sorted(arr_y))) # arr_x = sorted(arr_x) # arr_y = sorted(arr_y) # mid = len(arr_x)/2 # X = arr_x[mid] # mid = len(arr_y)/2 # Y = arr_y[mid] # new_feature_number = 0 # temp_set_number = [] # temp_distance = [] # j = 0 # for i in range(features_number): # if status[i] == 1: # new_feature_number += 1 # #temp_set_number.append() # #temp_distance.append(height_from_floor[i]) # j += 1 # #height_from_floor = temp_distance # features_number = new_feature_number # #print("Features_num" + str(features_number)) # features = [] # for i in range(features_number): # features.append(tracked_features[i]) # features = np.array(features) # tracked_features = [] # current_frame = next_frame.copy() # except Exception, e: # raise e # #-------Showing Points ------------------------ # for i in range(features_number): # # print ("features " + str(features)) # # print ("features0 " + str(features[0])) # # print ("features00 " + str(features[0][0])) # # print ("features000 " + str(features[0][0][0])) # #print ("features " + str(features[i])) # cv2.circle(current_frame_copy, # tuple(features[i][0]), # 3, # 255, # -1) # cv2.circle(current_frame_copy, # (X,Y), # 5, # (0,0,255), # -1) # show the output images cv2.imshow("HOG", current_frame_copy) key = cv2.waitKey(1) & 0xFF if key == ord("w"): break camera.release() cv2.destroyAllWindows()
def text_detector(image): # resize for EAST usage image = cv2.resize(image, (640, 320), interpolation=cv2.INTER_AREA) orig = image # could be optimized later, leave out extra copy # orig_for_crop = image.copy() (H, W) = image.shape[:2] (newW, newH) = (640, 320) rW = W / float(newW) rH = H / float(newH) image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] for y in range(0, numRows): scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, ignore it if scoresData[x] < 0.5: continue # compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box padding = 5 endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))+padding endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))+padding startX = int(endX - w)-padding startY = int(endY - h)-padding # add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) boxes = non_max_suppression(np.array(rects), probs=confidences) # count = 0 cropped_imgs = [] for (startX, startY, endX, endY) in boxes: startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # crop_object(orig, startX, startY, endX, endY, count) # count += 1 cropped_img = orig[int(startY):int(endY), int(startX):int(endX)] cropped_imgs.append(cropped_img) # draw the bounding box on the image # cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 3) return cropped_imgs
def image_processing(image, orig): # resize the original image to new dimensions (origH, origW) = image.shape[:2] height = 320 width = 320 min_confidence = 0.5 # set the new height and width to default 320 by using args #dictionary. (newW, newH) = width, height # Calculate the ratio between original and new image for both height and weight. # This ratio will be used to translate bounding box location on the original image. rW = origW / float(newW) rH = origH / float(newH) image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # construct a blob from the image to forward pass it to EAST model blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) east = "models/frozen_east_text_detection.pb" # load the pre-trained EAST model for text detection net = cv2.dnn.readNet(east) # The following two layer need to pulled from EAST model for achieving this. layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # Forward pass the blob from the image to get the desired output layers net.setInput(blob) (scores, geometry) = net.forward(layerNames) # Returns a bounding box and probability score if it is more than minimum confidence # Find predictions and apply non-maxima suppression (boxes, confidence_val) = predictions(scores, geometry) boxes = non_max_suppression(np.array(boxes), probs=confidence_val) ##Text Detection and Recognition # initialize the list of results results = [] # loop over the bounding boxes to find the coordinate of bounding boxes for (startX, startY, endX, endY) in boxes: # scale the coordinates based on the respective ratios in order to reflect bounding box on the original image startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # extract the region of interest r = orig[startY:endY, startX:endX] # configuration setting to convert image to string. configuration = "-l eng --oem 1 --psm 8" ##This will recognize the text from the image of bounding box text = pytesseract.image_to_string(r, config=configuration) # append bbox coordinate and associated text to the list of results results.append(((startX, startY, endX, endY), text)) # Display the image with bounding box and recognized text orig_image = orig.copy() # Moving over the results and display on the image for ((start_X, start_Y, end_X, end_Y), text) in results: # display the text detected by Tesseract print("{}\n".format(text)) # Displaying text text = "".join([x if ord(x) < 128 else "" for x in text]).strip() cv2.rectangle(orig_image, (start_X, start_Y), (end_X, end_Y), (0, 0, 255), 2) cv2.putText( orig_image, text, (start_X, start_Y - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, ) # print(f"boxes are{boxes}") plt.imshow(orig_image) plt.title("Output") plt.show() cv2.imshow("Original", image) cv2.waitKey(0) cv2.destroyAllWindows() return boxes
car_cascade = cv2.CascadeClassifier(cascPath) # Read the image image = cv2.imread(imagePath) # Resize the image so it fits in the screen image1 = imutils.resize(image, height=500) gray = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY) # Detect faces in the image faces = car_cascade.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), # flags = cv2.cv.CV_HAAR_SCALE_IMAGE flags=0 ) face = non_max_suppression(faces, probs=None, overlapThresh=0.3) if format(len(faces)) == 1: print("Found {0} face!".format(len(faces))) else: print("Found {0} faces!".format(len(faces))) # Draw a rectangle around the faces for (x, y, w, h) in face: cv2.rectangle(image1, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.imshow("Faces found", image1) cv2.waitKey(0)
def det_txt_ocr(img_path): try: # load the input image and grab the image dimensions image = cv2.imread(img_path) #angle for rotation # fix------ ag = 357 # fix------ #rotation num_rows, num_cols = image.shape[:2] rotation_matrix = cv2.getRotationMatrix2D((num_cols / 2, num_rows / 2), ag, 1) image = cv2.warpAffine(image, rotation_matrix, (num_cols, num_rows)) orig = image.copy() (origH, origW) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height # fix --------------------------- inW = 320 inH = 160 # ------------------------------- (newW, newH) = (inW,inH) rW = origW / float(newW) rH = origH / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets # fix --------------------------- blobsize = 0.5 # ------------------------------- blob = cv2.dnn.blobFromImage(image,blobsize, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) netdet.setInput(blob) (scores, geometry) = netdet.forward(layerNames) # decode the predictions, then apply non-maxima suppression to # suppress weak, overlapping bounding boxes (rects, confidences) = decode_predictions(scores, geometry) boxes = non_max_suppression(np.array(rects),probs=confidences,) results = [] # 1 round for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # in order to obtain a better OCR of the text we can potentially # apply a bit of padding surrounding the bounding box -- here we # are computing the deltas in both the x and y directions # fix --------------------------- pX = 0.0 pY = 0.2 # ------------------------------- dX = int((endX - startX) * pX) dY = int((endY - startY) * pY) # apply padding to each side of the bounding box, respectively startX = 0 startY = max(0, startY - dY) endX = origW endY = min(origH, endY + (dY * 2)) # extract the actual padded ROI roi = orig[startY:endY, startX:endX] pd = orig[endY:origH, startX:endX] # in order to apply Tesseract v4 to OCR text we must supply pytesseract.pytesseract.tesseract_cmd = r'Tesseract-OCR\tesseract' config = ("-l thafast --oem 1 --psm 7") text = pytesseract.image_to_string(roi, config=config) # add the bounding box coordinates and OCR'd text to the list # of results results.append(((startX, startY, endX, endY), text)) # just 1r break # sort the results bounding box coordinates from top to bottom results = sorted(results, key=lambda r:r[0][1]) # loop over the results for ((startX, startY, endX, endY), text) in results: # display the text OCR by Tesseract x = list(text) for i, item in enumerate(x): if ord(item) == 46 or ord(item) == 91 or ord(item) == 93: x[i] = " " if ord(item) == 124: x[i] = " " if ord(item) > 3630: x[i] = " " # text out put text_out = "".join([c if ord(c) > 44 else "" for c in x]).strip() # print("--OCR--") #print(text_out) output = orig.copy() cv2.rectangle(output, (startX, startY), (endX, endY),(127, 255, 0), 1) # sv pd idx = 1 write_name = r'pd\pd_' + str(idx) + '.png' cv2.imwrite(write_name, pd) idx = + 1 # show the output image # cv2.imshow("Text Detection", output) # cv2.moveWindow("Text Detection", 600, 300) # cv2.imshow("Text ROI", roi) # cv2.moveWindow("Text ROI", 600, 400) # cv2.imshow("PV", pv) # cv2.moveWindow("PV", 600, 500) # (origH2, origW2) = pv.shape[:2] # print(origH2) # print(origW2) cv2.waitKey(0) return text_out except Exception as e: text_out = "type error: " + str(e) # print("can't detect text") return text_out
imagePath = "/home/pi/Desktop/Canteen/canteen145.jpe" image = cv2.imread(imagePath) image = imutils.resize(image, width=min(800, image.shape[1])) orig = image.copy() (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(4, 4), scale=1.05) #padding(8,8) , scale 1.01 for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) #0.65 bigger value less overlap for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) filename = imagePath #[imagePath.rfind("/")+1:] print("[INFO] {}: {} original boxes, {} after suppression".format( filename, len(rects), len(pick))) #cv2.imshow("Before NMS" , orig) #cv2.imshow("After NMS" , image) #cv2.waitKey(0) #cv2.destroyAllWindows() cv2.imwrite('tuning_canteenpic.jpeg', image) cv2.destroyAllWindows()
def OCR(): cv2.destroyAllWindows() engine.say("A4 mode press 13 and for medicine mode press 11") engine.runAndWait() engine.stop() #/////////////////////////////////////////////////////// #ocr_mode = int(input('''Enter the mode of the OCR operation: A4 Papers: 1 Medicine: 2 ''')) ocr_run = True while ocr_run: sleep(.25) if GPIO.input(11)==GPIO.HIGH: print("Medicine Mode activated") ################################################################################### image = cv2.imread('1.jpg', cv2.IMREAD_COLOR) print("Image loaded ") # /////////////////////////////////////////////////////// orig = image.copy() (origH, origW) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (int(320), int(320)) rW = origW / float(newW) rH = origH / float(newH) # resize the image and grab the new image dimension2s image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet("/home/pi/Desktop/OCR_TTS-master/frozen_east_text_detection.pb") # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) # decode the predictions, then apply non-maxima suppression to # suppress weak, overlapping bounding boxes (rects, confidences) = decode_predictions(scores, geometry) boxes = non_max_suppression(np.array(rects), probs=confidences) final_list = [] text_empty = '' # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) dX = int((endX - startX) * float(0)) dY = int((endY - startY) * float(0)) startX = max(0, startX - dX) startY = max(0, startY - dY) endX = min(origW, endX + (dX * 2)) endY = min(origH, endY + (dY * 2)) roi = orig[startY:endY, startX:endX] ######################################################################## text = pytesseract.image_to_string( roi, config="-l eng --oem 1 --psm 11") print("for:" + text) text_empty = text_empty +text + " " print(text_empty) engine.say(text_empty) engine.runAndWait() engine.stop() exit_loop = True sleep(1) engine.say("repeat press the same button ") engine.runAndWait() engine.stop() while exit_loop: sleep(.25) if GPIO.input(11)==GPIO.HIGH: engine.say(text_empty) engine.runAndWait() engine.stop() elif GPIO.input(13)==GPIO.HIGH: ocr_run=False exit_loop = False engine.stop() else: pass ############################################################################# elif GPIO.input(13) == GPIO.HIGH: print("A4 Mode") # ---------------------------Load Imagge---------------------------# img = cv2.imread('1.png', cv2.IMREAD_COLOR) # ---------------------------GreyScale Imagge---------------------------# # convert to grey to reduce detials gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # ///////////////////////////////////////////////////////////////// # ---------------------------Filter1 Imagge---------------------------# gray = cv2.bilateralFilter(gray, 11, 17, 17) # Blur to reduce noise # ///////////////////////////////////////////////////////////////// # ---------------------------Thresholding Imagge---------------------------# gray = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) # ///////////////////////////////////////////////////////////////// # ---------------------------Result---------------------------# original = pytesseract.image_to_string(gray, config=' -l eng --oem 1 ') print(original) engine.say("words detected are "+original) engine.runAndWait() engine.stop() exit_loop = True sleep(1) engine.say("repeat press the same button") engine.runAndWait() engine.stop() while exit_loop: sleep(.25) if GPIO.input(13)==GPIO.HIGH: engine.say(original) engine.runAndWait() engine.stop() elif GPIO.input(11)==GPIO.HIGH: ocr_run = False exit_loop = False engine.stop() else: pass elif (GPIO.input(15)==GPIO.HIGH): break else: pass
def recognize(self, image): # grab the image dimensions image = imutils.resize(image, width=320) orig = image.copy() (origH, origW) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (self.config_width, self.config_height) rW = origW / float(newW) rH = origH / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layer_names = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" ] # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) self.net.setInput(blob) (scores, geometry) = self.net.forward(layer_names) # decode the predictions, then apply non-maxima suppression to # suppress weak, overlapping bounding boxes (rects, confidences) = self.decode_predictions(scores, geometry) boxes = non_max_suppression(np.array(rects), probs=confidences) # initialize the list of results results = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # in order to obtain a better OCR of the text we can potentially # apply a bit of padding surrounding the bounding box -- here we # are computing the deltas in both the x and y directions padding = self.pyconfig.getfloat('text_recognition', 'padding') dX = int((endX - startX) * padding) dY = int((endY - startY) * padding) # apply padding to each side of the bounding box, respectively startX = max(0, startX - dX) startY = max(0, startY - dY) endX = min(origW, endX + (dX * 2)) endY = min(origH, endY + (dY * 2)) # extract the actual padded ROI roi = orig[startY:endY, startX:endX] # in order to apply Tesseract v4 to OCR text we must supply # (1) a language, (2) an OEM flag of 4, indicating that the we # wish to use the LSTM neural net model for OCR, and finally # (3) an OEM value, in this case, 7 which implies that we are # treating the ROI as a single line of text config = "-l eng --oem 0 -c tessedit_char_whitelist=123ABCDEIGNR --psm 8" text = pytesseract.image_to_string(roi, config=config) # add the bounding box coordinates and OCR'd text to the list # of results results.append(((startX, startY, endX, endY), text)) # sort the results bounding box coordinates from top to bottom results = sorted(results, key=lambda r: r[0][1]) output_data = [] # loop over the results for ((startX, startY, endX, endY), text) in results: # strip out non-ASCII text so we can draw the text on the image # using OpenCV, then draw the text and a bounding box surrounding # the text region of the input image text = "".join([c if ord(c) < 128 else "" for c in text]).strip() output_data.append(text) return output_data
] # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet(args["east"]) # construct the blob from the image and then forward pass of the # model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB = True, crop = False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) # decode the predictions, then apply NMS to suppress weak, # overlapping bounding boxes (rects, confidences) = decode_predictions(scores, geometry) boxes = non_max_suppression(np.array(rects), probs = confidences) # initialise the list of results results = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) # applying padding surrounding to bounding box dX = int((endX - startX) * args["padding"]) dY = int((endY - startY) * args["padding"])
def Bemoji (imagesrc): image = np.array(imagesrc) # set B emoji scaling scalefactor = 2 scalevar = (scalefactor - 1)/2 # load the input image and grab the image dimensions min_conf = 0.1 eastpath = "frozen_east_text_detection.pb" orig = image.copy() (H, W) = image.shape[:2] (newW, newH) = (640, 640) rW = W / float(newW) rH = H / float(newH) image = cv2.resize(image, (newW, newH)) (H, W) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet(eastpath) # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() net.setInput(blob) (scores, geometry) = net.forward(layerNames) end = time.time() # show timing information on text prediction print("[INFO] text detection took {:.6f} seconds".format(end - start)) # grab the number of rows and columns from the scores volume, then # initialize our set of bounding box rectangles and corresponding # confidence scores (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the geometrical # data used to derive potential bounding box coordinates that # surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, ignore it if scoresData[x] < min_conf: continue # compute the offset factor as our resulting feature maps will # be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and then # compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height of # the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates for # the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score to # our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) Bimage = cv2.imread("B.png", -1) b, g, r, a = cv2.split(Bimage) Bimage = cv2.merge((r, g, b, a)) print("[INFO] loading Tesseract...") start = time.time() for (startX, startY, endX, endY) in boxes: roi = image[startY:endY, startX:endX] gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) preprocess = "thresh" # check to see if we should apply thresholding to preprocess the # image if preprocess == "thresh": gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] # make a check to see if median blurring should be done to remove # noise elif preprocess == "blur": gray = cv2.medianBlur(gray, 3) # write the grayscale image to disk as a temporary file so we can # apply OCR to it filename = "{}.png".format(os.getpid()) if gray is not None: cv2.imwrite(filename, gray) text = pytesseract.image_to_boxes(Image.open(filename)) os.remove(filename) else: text = "" print(text) text2 = str.split(text) if text2: dX = [int(text2[x*6 + 1]) for x in range (0, (int(len(text2)/6)))] dY = [int(text2[x * 6 + 2]) for x in range (0, (int(len(text2)/6)))] dW = [int(text2[x * 6 + 3])-int(text2[x*6 + 1]) for x in range (0, (int(len(text2)/6)))] dH = [int(text2[x * 6 + 4])-int(text2[x * 6 + 2]) for x in range (0, (int(len(text2)/6)))] #print(str(dW)) startX = [int((startX + dX[x]) * rW) for x in range (0, (int(len(text2)/6)))] startY = [int((startY + dY[x]) * rH) for x in range (0, (int(len(text2)/6)))] letter = [text2[x * 6] for x in range (0, (int(len(text2)/6)))] for x in range (0, (int(len(text2)/6))): if (letter[x] == "G") | (letter[x] == "g") | (letter[x] == "B") | (letter[x] == "b") | (letter[x] == "P") | (letter[x] == "p"): exception = 0 try: placeimage(orig, Bimage, startX[x]-int(scalevar*(dW[x]*rH)), startY[x]-int(scalevar*(dH[x]*rW)), int(dW[x]*rH)*scalefactor, int(dH[x]*rW)*scalefactor) except ValueError: exception = 1 if exception: scalecount = scalefactor - 0.1 while scalecount > 0: try: placeimage(orig, Bimage, startX[x] - int(((scalecount - 1) / 2) * (dW[x] * rH)), startY[x] - int(((scalecount - 1) / 2) * (dH[x] * rW)), int(dW[x] * rH * scalecount), int(dH[x] * rW * scalecount)) except ValueError: scalecount = scalecount - 0.1 print(str(letter[x])+" " +str(scalecount)) else: break end = time.time() print("[INFO] letter detection took {:.6f} seconds".format(end - start)) imagesrc = Image.fromarray(orig.astype('uint8')) return imagesrc
while(True): ret,frame=cap.read() frame = imutils.resize(frame, width=min(400, frame.shape[1])) orig = frame.copy() (rects, weights) = hog.detectMultiScale(frame, winStride=(4, 4), padding=(8, 8), scale=1.05) for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) for (xA, yA, xB, yB) in pick: cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2) cv2.imshow('frame', frame) count = count + len(pick) print count ch = 0xFF & cv2.waitKey(1) if ch == 27:
def main(): hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) video = cv2.VideoCapture(0) is_ok, bgr_image_input = video.read() if not is_ok: print("Cannot read video source") sys.exit() height = bgr_image_input.shape[0] width = bgr_image_input.shape[1] try: fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') fname = "OUTPUT.avi" fps = 30.0 videoWriter = cv2.VideoWriter(fname, fourcc, fps, (width, height)) except: print("Error: can't create output video: %s" % fname) sys.exit() fps = video.get(cv2.CAP_PROP_FPS) start = time.time() frame = 0 while True: is_ok, bgr_image_input = video.read() if not is_ok: break frame = frame + 1 # load the image and resize it to (1) reduce detection time # and (2) improve detection accuracy bgr_image_input = imutils.resize(bgr_image_input, width=min(400, bgr_image_input.shape[1])) orig = bgr_image_input.copy() # detect people in the image (rects, weights) = hog.detectMultiScale(bgr_image_input, winStride=(4, 4), padding=(8, 8), scale=1.05) # draw the original bounding boxes for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(bgr_image_input, (xA, yA), (xB, yB), (0, 255, 0), 2) # show some information on the number of bounding boxes '''filename = imagePath[imagePath.rfind("/") + 1:] print("[INFO] {}: {} original boxes, {} after suppression".format( filename, len(rects), len(pick)))''' # show the output images #cv2.imshow("Before NMS", orig) now = time.time() fps = frame / (now - start) fps = np.round(fps, 2) cv2.putText(bgr_image_input, "fps: " + str(fps), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) cv2.imshow("After NMS", bgr_image_input) videoWriter.write(bgr_image_input) key_pressed = cv2.waitKey(1) & 0xFF if key_pressed == 27 or key_pressed == ord('q'): break