Example #1
0
def analyze(image):
    image = imutils.resize(image, width=min(400, image.shape[1]))
    #orig = image.copy()

    # detect people in the image
    (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
        padding=(8, 8), scale=1.05)

    # draw the original bounding boxes
    #for (x, y, w, h) in rects:
        #cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

    # draw the final bounding boxes
    #for (xA, yA, xB, yB) in pick:
    #    cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

    # show some information on the number of bounding boxes
    #filename = imagePath[imagePath.rfind("/") + 1:]
    #print("[INFO] {}: {} original boxes, {} after suppression".format(
    #    filename, len(rects), len(pick)))

    # show the output images
    #cv2.imshow("Before NMS", orig)
    #cv2.imshow("After NMS", image)
    #cv2.waitKey(0)
    return pick
def processImage(frame):
	#frame = imutils.resize(frame, width = min(600, frame.shape[1]))

	# detect people in the image
	rects = classifier.detectMultiScale(frame, 1.1, 200)# winStride=(4, 4), padding=(8, 8), scale=1.05)

	# apply non-maxima suppression to the bounding boxes using a
	# fairly large overlap threshold to try to maintain overlapping
	# boxes that are still people
	rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
	pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

	# draw the final bounding boxes
	for (xA, yA, xB, yB) in pick:	
		cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2)

	# show some information on the number of bounding boxe
	print("[INFO]: {} are currently detected in stream".format(len(pick)))

	# show the output images
	try:
		cv2.imshow('Stream', frame)
	except:
		pass
	cv2.waitKey(1)
def detector(image):
    '''
    @image is a numpy array
    '''

    clone = image.copy()

    (rects, weights) = HOGCV.detectMultiScale(image, winStride=(4, 4),
                                              padding=(8, 8), scale=1.05)

    print(rects)
    print(weights)
    # draw the original bounding boxes
    for (x, y, w, h) in rects:
        print ("inside rects:",x,y,w,h)
        cv2.rectangle(clone, (x, y), (x + w, y + h), (0, 0, 255), 2)

    # Applies non-max supression from imutils package to kick-off overlapped
    # boxes
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    result = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    #if result:
    print ("result=")
    print (result)
    return result
Example #4
0
def process(content, app):
    if not isinstance(content, unicode):
        return []
    image_data = re.sub('^data:image/.+;base64,', '', content).decode('base64')
    image = Image.open(cStringIO.StringIO(image_data))
    image = cv2.cvtColor(np.array(image), 2)
    # cv2.imwrite('image.jpg', image)

    # gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    faces = cascade.detectMultiScale(image, 1.03, 500, minSize=(10, 10))

    if(len(faces) <= 0):
        return []

    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in faces])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.15)

    good = []
    for (x, y, x2, y2) in pick:
        good.append(
            {'x': x*1, 'y': y*1, 'width': (x2-x)*1, 'height': (y2-y)*1})

    # for f in good:
    #   cv2.rectangle(
    #        image,
    #        (f.get('x'), f.get('y')),
    #        (f.get('width'), f.get('height')),
    #        (0, 255, 0), 6)
    #    cv2.imwrite('image.jpg', image)

    return good
    def detect_object(self, frame, min_width=35, min_height=35):
        """
        将二值化图像中的物体挑选出来

        :param frame: 二值化图像
        :param min_width: 物体最小宽度
        :param min_height: 物体最小高度
        :return: 每个物体的矩形框左上角x1, y1坐标, 右下角x2, y2坐标和物体中心坐标cx, cy
                [(x1, y1, x2, y2), (cx, cy)]
        """
        matches = []

        # 找到物体边界矩形
        image, contours, hierarchy = cv.findContours(frame, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_TC89_L1)

        # 利用非极大值抑制法避免一个物体上多个矩形(误检测多次)
        rects = np.array([(x, y, x + w, y + h) for x, y, w, h in map(cv.boundingRect, contours)])
        pick = non_max_suppression(rects, overlapThresh=0.65)

        # 从每个坐标中选出符合标准大小的坐标(物体)
        for x1, y1, x2, y2 in pick:
            # 判断物体大小是否大于设定的标准
            is_valid = (x2 - x1 > min_width) and (y2 - y1 > min_height)

            # 符合标准, 将矩形坐标和物体中心坐标添加到列表中
            if is_valid:
                centroid = self._get_centroid(x1, y1, x2, y2)

                matches.append([(x1, y1, x2, y2), centroid])

        return matches
Example #6
0
        def count_peds(self, image):
                if image is None:
                        return(0,image)
                
                image = imutils.resize(image, width=min(400, image.shape[1]))
                orig = image.copy()

                # detect people in the image
                (rects, weights) = self.hog.detectMultiScale(image, winStride=(4, 4),
                        padding=(8, 8), scale=1.05)

                # draw the original bounding boxes
                for (x, y, w, h) in rects:
                        cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

                # apply non-maxima suppression to the bounding boxes using a
                # fairly large overlap threshold to try to maintain overlapping
                # boxes that are still people
                rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
                pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

                # draw the final bounding boxes
                for (xA, yA, xB, yB) in pick:
                        cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)
                cv2.putText(image, "{}:Peds".format(len(pick)), (image.shape[1]-40,
                    10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

                # return the image with rectangles drawn over the pedestrians
                # as well as a count of pedestrians
                return( len(pick), image)
Example #7
0
def beginVideoProcess(im):
    # load the image and resize it to (1) reduce detection time
    # and (2) improve detection accuracy
    image = im.copy()
    orig = image.copy()

    #image = cv2.imread(imagePath)
    image = imutils.resize(image, width=min(400, image.shape[1]))

    # detect people in the image
    (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
        padding=(8, 8), scale=1.05)

    # draw the original bounding boxes
    for (x, y, w, h) in rects:
        cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

    # draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

    # show some information on the number of bounding boxes
    filename = "webcam"
    print("[INFO] {}: {}".format(filename, len(pick)))
    # show the output images
    cv2.imshow("After NMS", image)

    return len(pick)
Example #8
0
File: main.py Project: HungLV4/HOG
def detectMultiscale(filename, winStride = (4, 4)):
	# read image
	filepath = "test/ship/multi_scale/" + filename
	image = cv2.imread(filepath, 0)

	# load classifier
	clf = joblib.load(HOG_CLF_FILE)

	height, width = image.shape

	# detect ships in image
	positions = []
	for i in xrange(0, height - 128 - winStride[0], winStride[0]):
		print float(i) / height * 100
		for j in xrange(0, width - 64 - winStride[0], winStride[1]):
			hog_fd = hog(image[i : i + 128, j : j + 64], orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualise=False)
			nbr = clf.predict(np.array([hog_fd], 'float64'))
			if nbr[0] == 1:
				positions.append((j, i))
	
	# apply non-maxima suppression to the bounding boxes using a
	# fairly large overlap threshold to try to maintain overlapping
	# boxes that are still people
	rects = np.array([[j, i, j + 64, i + 128] for (j, i) in positions])
	pick = non_max_suppression(rects, probs=None, overlapThresh=0.5)

	# draw the final bounding boxes
	for (xA, yA, xB, yB) in pick:
		cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 1)

	cv2.imwrite("results/ship/multi_scale/" + filename, image)
def hello():
    # we are reading from webcam
    camera = cv2.VideoCapture(WEB_CAM_INDEX)
    time.sleep(0.25)

    # initialize the HOG descriptor/person detector
    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

    # grab the current frame
    (grabbed, frame) = camera.read()

    # resize the frame, convert it to grayscale, and blur it
    frame = imutils.resize(frame, width=min(FRAME_WIDTH, frame.shape[1]))

    # detect people in the image
    (rects, weights) = hog.detectMultiScale(frame, winStride=(WIN_STRIDE_VAL, WIN_STRIDE_VAL),
        padding=(8, 8), scale=SCALE_VAL)

    orig = frame.copy()

    occupied = False
    inColisionZone = False

    # draw the original bounding boxes
    for (x, y, w, h) in rects:
    	if ( y + h ) < NEAREST_POINT:
    		cv2.rectangle(orig, (x, y), (x + w, y + h), (255, 0, 0), 2)
    	else:
    		cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

    # draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        occupied = True
    	if ( y + h ) < NEAREST_POINT:
    		cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
    	else:
            inColisionZone = True
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # cleanup the camera and close any open windows
    camera.release()
    # cv2.destroyAllWindows()

    data = {
        'isOccupied'  : occupied,
        'inColisionZone' : inColisionZone
    }
    print (inColisionZone)
    resp = jsonify(data)
    resp.status_code = 200

    return resp
Example #10
0
def readImage(image):
    (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
	padding=(8, 8), scale=1.05)
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    count=0
    for (xA, yA, xB, yB) in pick:
	    count=count+1
    return count
Example #11
0
def draw_detections(img, rects, thickness = 1):
    global passTotal
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    for x, y, w, h in pick:
        # the HOG detector returns slightly larger rectangles than the real objects.
        # so we slightly shrink the rectangles to get a nicer output.
        pad_w, pad_h = int(0.15*w), int(0.05*h)
        cv2.rectangle(img, (x+pad_w, y+pad_h), (x+w-pad_w, y+h-pad_h), (0, 255, 0), thickness)
        passagem = len(pick)
        passTotal += passagem
        #passagem = str(len(pick))
        print("Passagem Total: " + str(passTotal))
Example #12
0
def process(content, app):
    if not isinstance(content, unicode):
        return []
    image_data = re.sub('^data:image/.+;base64,', '', content).decode('base64')
    image = Image.open(cStringIO.StringIO(image_data))
    image = cv2.cvtColor(np.array(image), 2)
    # cv2.imwrite('image.jpg', image)

    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    faces = cascade.detectMultiScale(image, 1.03, 500, minSize=(10, 10))

    if(len(faces) <= 0):
        return []

    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in faces])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.15)

    good = []
    for (x, y, x2, y2) in pick:

        obj = gray[(y-IMAGE_PADDING):(y2+IMAGE_PADDING),
                   (x-IMAGE_PADDING):(x2+IMAGE_PADDING)]
        if obj.shape[0] == 0 or obj.shape[1] == 0:
            continue
        ratio = IMAGE_SIZE/obj.shape[1]
        obj = cv2.resize(obj, (int(IMAGE_SIZE), int(obj.shape[0]*ratio)))
        # find the keypoints and descriptors for object
        kp_o, des_o = orb.detectAndCompute(obj, None)
        if len(kp_o) == 0:
            continue

        # match descriptors
        matches = bf.match(des_r, des_o)

        if(len(matches) >= MATCH_THRESHOLD):
            good.append({
                'x': x*1,
                'y': y*1,
                'width': (x2-x)*1,
                'height': (y2-y)*1,
                'label': 'battlefront'
            })

    # for f in good:
    #    cv2.rectangle(
    #        image,
    #        (f.get('x'), f.get('y')),
    #        (f.get('width'), f.get('height')),
    #        (0, 255, 0), 6)
    #    cv2.imwrite('image.jpg', image)

    return good
def detect_people(frame):
    """
    detect humans using HOG descriptor
    Args:
        frame:
    Returns:
        processed frame
    """
    (rects, weights) = hog.detectMultiScale(frame, winStride=(8, 8), padding=(16, 16), scale=1.06)
    rects = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    for (x, y, w, h) in rects:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
    return frame
Example #14
0
def get_people():
    image,_ = freenect.sync_get_video()
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
    image = imutils.resize(image, width=min(400, image.shape[1]))
    # detect people in the image
    (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(32, 32), scale=1.05)
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

    # draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)
        pointX, pointY = 200, 200
        rectX, rectY = xA, yA
        rectX2, rectY2 = xB, yA
        rectX3, rectY3 = xA, yB
        rectX4, rectY4 = xB, yB
        '''
xA,yA -----2
|          |
|          |
|          |
3-------xB,yB'''

        rectXs, rectYs = [rectX, rectX2, rectX3, rectX4], [rectY, rectY2, rectY3, rectY4]

        largestX = max(rectXs)
        smallestX = min(rectXs)
        largestY = max(rectYs)
        smallestY = min(rectYs)

        if (pointX > smallestX and pointX < largestX and pointY > smallestY and pointY < largestY) and hit:
            if pointY < (largestY-smallestY)/3 + smallestY:
                print("point is in 3/3 (bottom)")
                r = requests.post("https://d6fb041f.ngrok.io/post", data={"part": "limb"})
            else:
                if pointY < (largestY-smallestY)/3*2 + smallestY:
                    print("point is in 2/3 (middle)")
                    r = requests.post("https://d6fb041f.ngrok.io/post", data={"part": "body"})
                else:
                    if (pointY < (largestY-smallestY)/3*3 + smallestY):
                        print("point is in 1/3 (top)")
                        r = requests.post("https://d6fb041f.ngrok.io/post", data={"part": "head"})
        else:
            print("point not inside box")

    hit = False
    # check if middle is in x area
    # set hit to top, middle, or bottom
    cv2.imshow('People detected picture', image)
def trackPerson():
	
	global imageReceived
	global metaInfoReceived
	global imageReceivedProcessed
	
	#	check for no frame
	if imageReceived is None:
		return None, None

	#	get frame after locking
	lock.acquire()
	image = imageReceived
	metaInfo = metaInfoReceived
	imageReceivedProcessed = True
	lock.release()

	orig = image.copy()

	# detect people in the image
	(rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
		padding=(8, 8), scale=1.05)

	# draw the original bounding boxes
	for (x, y, w, h) in rects:
		cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

	# apply non-maxima suppression to the bounding boxes using a
	# fairly large overlap threshold to try to maintain overlapping
	# boxes that are still people
	rects = numpy.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
	pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

	# draw the final bounding boxes
	for (xA, yA, xB, yB) in pick:
		cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

	# show some information on the number of bounding boxes
	#filename = imagePath[imagePath.rfind("/") + 1:]
	#filename = "FILE"
	#print("[INFO] {}: {} original boxes, {} after suppression".format(
	#	filename, len(rects), len(pick)))

	# show the output images
	cv2.imshow("Before NMS", orig)
	cv2.imshow("After NMS", image)
	#print time.time() - start
	return pick, metaInfo
def rois_extraction (cv_image):
    pre_rois = []
    boundingBoxing = []

#    cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
    clone = copy.copy(cv_image)
    clone_2 = copy.copy(cv_image)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(3,3))
    grayimg = clahe.apply(clone)
    
    grayimg = cv2.GaussianBlur(grayimg,(5,5),0)

    ret1,th1 = cv2.threshold(grayimg, min_th, 255,cv2.THRESH_BINARY)
    re2,th2 = cv2.threshold(grayimg, max_th, 255, cv2.THRESH_BINARY_INV)

    band_thresh = cv2.bitwise_and(th1, th2)
#    print "i make a threshold band"
    contours, hierarchy = cv2.findContours(band_thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
    mask = np.zeros(clone.shape, dtype=np.uint8)
    cv2.drawContours(mask,contours,-1,255,-1)
#    print "contours done"

    for c in contours:
        #if the contour is to small, ignore it
#        print "search contours"
        if cv2.contourArea(c) > min_area and cv2.contourArea(c) < max_area: 
            boundingBoxing.append(cv2.boundingRect(c))
            #print "I found a ROI"
        else:
            #print "Is not ROI"
            continue

    for (x, y, w, h) in boundingBoxing:
        cv2.rectangle(cv_image, (x, y), (x + w, y + h), 0, 1)

    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in boundingBoxing])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

    for (xA, yA, xB, yB) in pick:
       aux_roi = clone[yA:yB,xA:xB]
       pre_rois.append(aux_roi)
       cv2.rectangle(clone_2, (xA, yA), (xB, yB), 0, 1)
    
#    sorted(pre_rois, cmp=order)
    print len(pre_rois)

    return pre_rois;
def find_persons(cv_image):

  boundingBoxing = []
  
  clone = copy.copy(cv_image)
  clone_2 = copy.copy(cv_image)
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(3,3))
  gray = clahe.apply(cv_image)
  gray = cv2.GaussianBlur (gray, (21,21), 0)

  min_thresh = cv2.threshold(gray, min_th, 255, cv2.THRESH_BINARY)[1]
  max_thresh = cv2.threshold(gray, max_th, 255, cv2.THRESH_BINARY_INV)[1]

  thresh = cv2.bitwise_and(min_thresh, max_thresh)
  band = cv2.bitwise_and(clone_2,thresh)

  #thresh = cv2.dilate(thresh, None, iterations = 2)
  (cnts, _) = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
    cv2.CHAIN_APPROX_SIMPLE)

  for c in cnts:
    if cv2.contourArea(c) > min_area and cv2.contourArea(c) < max_area: 
            boundingBoxing.append(cv2.boundingRect(c))
            cv2.drawContours(clone, [c], -1, 0, 2)
#            print "I found a ROI"
#        else:
#            print "Is not ROI"
    
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in boundingBoxing])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    
    for (x, y, w, h) in pick:
        cv2.rectangle(clone, (x, y), (x + w, y + h), 0, 1)

    #print pick

    cv2.imshow("result", clone)


  cv2.imshow("region_detector", cv_image)
  cv2.moveWindow("region_detector",0,0)
  cv2.imshow("band_threshold_image", thresh)
  cv2.moveWindow("band_threshold_image",0,400)
  cv2.moveWindow("result",500,0)
  cv2.waitKey(1)
def upload_file():
	if request.method == 'POST':
		f = request.files['file']
		newFileName = "test." + f.filename.rsplit(".")[-1]
		f.save(newFileName) # we want to keep the file extension
		# image recognition stuff here
		image = cv2.imread(newFileName)
		image = imutils.resize(image, width=min(500, image.shape[1]))
		orig = image.copy()
		(rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
			padding=(8, 8), scale=1.05)
		for (x, y, w, h) in rects:
			cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
		rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
		pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
		myBoundingBox = {}
		myBoundingBox['xA'], myBoundingBox['yA'], myBoundingBox['xB'], myBoundingBox['yB'] = pick[0]
		return json.dumps(myBoundingBox)
Example #19
0
def classfier(testImage,threadNum,capTime, detectCounter):
    #print(threadNum,capTime)
    (rects, weights) = hog.detectMultiScale(testImage, winStride=(8, 8),
        padding=(8, 8), scale=1.1)

    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

	# draw the final bounding boxes
    # if(pick):
    for (xA, yA, xB, yB) in pick:
        print("Image detected")
        detectCounter[0] = 0
        cv2.rectangle(testImage, (xA, yA), (xB, yB), (0, 255, 0), 2)
    # print(pick,"\n");
    curTime = time.time()
    #print ("Total time from capture", curTime - capTime)
    out.write(testImage)
    cv2.imshow("After NMS", testImage)
Example #20
0
def ped_detect(image):
    image = imutils.resize(image, width=min(800, image.shape[1]))
    orig = image.copy()
    # detect people in the image
    (rects, weights) = hog.detectMultiScale(image, winStride=(4,4),padding=(8, 8), scale=1.05)
    # draw the original bounding boxes
    for (x, y, w, h) in rects:
        cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    # draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)
    # show some information on the number of bounding boxes
    print("[INFO]: {} original boxes, {} after suppression".format(
        len(rects), len(pick)))
    return orig, image
def Hog_Pedestrian(frame):
    
    #Copy Image
    orig = frame.copy()
    
    # Use HOG to detect people in image
    hog_start = time.time()
    (rects, weights) = hog.detectMultiScale(frame, winStride=(4, 4),
        padding=(8, 8), scale=1.2)
    hog_stop = time.time()
    
    # Draw the original bounding boxes
    for (x, y, w, h) in rects:
        cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
    
    # Use non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    
    # Draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2)

    #Add FPS
    cv2.putText(frame, "FPS: {}".format(fps), (frame.shape[1]-100, 20),
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    # show the frame and record if the user presses a key
    cv2.imshow("No Max Suppression", orig)
    cv2.moveWindow("No Max Suppression", 0, 100)
    cv2.imshow("With Max Suppression", frame)
    cv2.moveWindow("With Max Suppression", WINDOW_SIZE+25, 100)
    
    #To track performance of HOG as we vary parameters
    print "HOG: " + str(hog_stop-hog_start)
Example #22
0
        cos = np.cos(angle)
        sin = np.sin(angle)

        h = x0[x] + x2[x]
        w = x1[x] + x3[x]

        endX = int(newX + (cos * x1[x]) + (sin * x2[x]))  # from tensorflow
        endY = int(newY - (sin * x1[x]) + (cos * x2[x]))  # from tensorflow
        startX = int(endX - w)
        startY = int(endY - h)

        coords.append((startX, startY, endX, endY))
        alphas.append(scoresData[x])

# apply non-maxima suppression to suppress weak, overlapping bounding
# boxes
boxes = non_max_suppression(np.array(coords), probs=alphas)

# loop over the bounding boxes
i = 0
for (startX, startY, endX, endY) in boxes:

    cv2.imwrite("Text_{}.jpg".format(i + 1),
                orig[startY - padding_y:endY + padding_y, startX -
                     padding_x:endX + padding_y])  # padding gives some leeway
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
    i = i + 1
    end = time.time()
#print('Time Taken for data number{} is ..'.format(pan))
print(end - start)
cv2_imshow(orig)
Example #23
0
def textRecognition():
    # load the input image and grab the image dimensions
    image = cv2.imread(path)
    image = image[0:900, 0:1100]
    cv2.imwrite('text.jpg', image)
    orig = image.copy()
    (origH, origW) = image.shape[:2]

    # set the new width and height and then determine the ratio in change
    # for both the width and height
    (newW, newH) = (320, 320)
    rW = origW / float(newW)
    rH = origH / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    # define the two output layer names for the EAST detector model that
    # we are interested in -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
    net = cv2.dnn.readNet(east)

    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (W, H), (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)

    # decode the predictions, then  apply non-maxima suppression to
    # suppress weak, overlapping bounding boxes
    (rects, confidences) = decode_predictions(scores, geometry)
    boxes = non_max_suppression(np.array(rects), probs=confidences)

    # initialize the list of results
    results = []

    # loop over the bounding boxes
    for (startX, startY, endX, endY) in boxes:
        # scale the bounding box coordinates based on the respective ratios
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        # in order to obtain a better OCR of the text we can potentially
        # apply a bit of padding surrounding the bounding box
        dX = int((endX - startX) * 0.03)
        dY = int((endY - startY) * 0.12)

        # apply padding to each side of the bounding box, respectively
        startX = max(0, startX - dX)
        startY = max(0, startY - dY)
        endX = min(origW, endX + (dX * 2))
        endY = min(origH, endY + (dY * 2))

        results.append((startX, startY, endX, endY))

    # sort the results bounding box coordinates from left to right
    results = sorted(results, key=lambda r: r[0])

    # get the predicted size of the entire text, based on the separate
    # box bounds of the individual words
    numObj = len(results) - 1
    startX = results[0][0]
    endX = results[numObj][2]
    startY = min(results[numObj][1], results[0][1])
    endY = max(results[numObj][3], results[0][3])

    # Sometimes it'll detect something super far away
    while (((endY - startY) > 100) and (numObj >= 0)):
        endY = max(results[numObj][3], results[0][3])
        numObj = numObj - 1

    roi = orig[startY:endY, startX:endX]
    roi = cv2.blur(roi, (5, 5))
    cv2.imwrite(test, roi)

    # in order to apply Tesseract v4 to OCR text we must supply
    # (1) a language, (2) an OEM flag of 1, indicating that the we
    # wish to use the LSTM neural net model for OCR, and finally
    # (3) an OEM value, in this case, 7 which implies that we are
    # treating the ROI as a single line of text
    config = ("-l eng --oem 1 --psm 7")
    text = pytesseract.image_to_string(roi, config=config)

    if (text):
        # Common fixes for stuff
        if not (text[0].isalpha()):
            text = text[1:]
        if not (text[-1].isalpha()):
            text = text[:-1]
        if not (text[-1].isalpha()):
            text = text[:-1]
        text = text.replace("’", "'")
        text = text.replace(".", ",")

    #os.remove("text.jpg")

    return text
def detector(video_capture,rot_angle, ROI_1, ROI_2, ppl_width):
    
    for i in range (frame_skip):
        video_capture.read()

    #read video
    ret, image = video_capture.read()    
    [height, width, layer] = image.shape;
    
    #rotate
    if (rot_angle != 0):
        (hh, ww) = image.shape[:2]
        center = (ww / 2, hh / 2)
        M = cv2.getRotationMatrix2D(center, rot_angle, 1.0)    
        image = cv2.warpAffine(image, M, (ww, hh))
    
    #mask before resize
    #image = image[ROI_1[1]:ROI_2[1],ROI_1[0]:ROI_2[0]]    
    
    image = imutils.resize(image, width=min(400, image.shape[1]))
    
    ##mask after resize
    resize_ratio = image.shape[1] / float(width) 
        
    max_ppl_size = np.ceil(ppl_width * resize_ratio * 1.4)
    min_ppl_size = np.ceil(ppl_width * resize_ratio * 0.8)
    #print max_ppl_size
    
    ROI_1 = np.int_(np.dot(ROI_1,resize_ratio))   
    ROI_2 = np.int_(np.dot(ROI_2,resize_ratio))
    #print ROI_1
    
    image = image[ROI_1[1]:ROI_2[1],ROI_1[0]:ROI_2[0]] 
    
    #Display - origin 
    orig = image.copy()
    
    #detect
    (rects, weights) = hog.detectMultiScale(image, winStride=(8, 8),
     padding=(8, 8), scale=1.05)
    
    ##delete large rect
    i = 0
    while (i < len(rects)):
        print ("ppp")
        print (float(rects[i][2]))
        if (rects[i][2] > max_ppl_size or rects[i][2] < min_ppl_size):
            #[x,y,w,h] = rects[i]
            #cv2.rectangle(orig, (x, y), (x + w, y + h), (255, 0, 0), 2)
            rects = np.delete(rects,i,0)
        else:
            #[x,y,w,h] = rects[i]
            #cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
            i += 1
     
    #Display - origin 
#    for (x, y, w, h) in rects:
#        #box size validation
#        print ('w = ' + str(w))
#        if (w < 100):
#            cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
    
    #Display - origin 
    cv2.imshow('Original', orig)
    
    #combine rectangle
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    
    
    people = 0;
	# draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        #box size validation  
        #if (xB - xA < 100):
        cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)
        people += 1
        
    #Display - result
    cv2.imshow("HOG + NMS", image)
    
#    if(random.random() > 0.8):
#            print ("save")
#            fname = "./save/" + time.strftime("%m_%d_%H_%M_%S")+ ".jpg"
#            cv2.imwrite(fname,image)
#            
    if people > 0:
        return True
    else:
        return False
Example #25
0
def pedestrians(path, w, h, n):
    from imutils.object_detection import non_max_suppression
    from imutils import paths
    import argparse
    import imutils
    from matplotlib import pyplot as plt
    import cv2
    import os
    import numpy as np
    import glob
    from tqdm import tqdm_notebook
    import pickle

    ### Create a list with the path of all the images in the file img1
    img_path = glob.glob(path + "/*.jpg")
    img_path.sort()
    img_path

    ### Background Subtraction of all the image in the folder and create image_bis
    print(" Background Subtraction of all the images in the folder ", "\n")
    img_path_ = list()
    fgbg = cv2.createBackgroundSubtractorMOG2()
    if not os.path.exists('img1_bis'):
        os.mkdir('img1_bis')

    # Computing background
    for id_im, im_path in enumerate(img_path):
        print("Frame #" + str(id_im) + '/' + str(len(img_path)), end="\r")
        im = cv2.imread(im_path)
        fgmask = fgbg.apply(im)
        img_path_.append('img1_bis' + '/' + '{:03d}'.format(id_im) + '.jpg')
        cv2.imwrite('img1_bis' + '/' + '{:03d}'.format(id_im) + '.jpg',
                    np.expand_dims((fgmask > 0), axis=-1) * im)
    dic_Paths = dict(
        (path_bis, path) for (path_bis, path) in zip(img_path_, img_path))

    ###pedestrian detection using HOG and SVM
    # initialize the HOG descriptor/person detector
    print("pedestrian detection using HOG and SVM", "\n")

    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
    dic_img_box = dict()

    for path in tqdm_notebook(img_path_):
        solution = []
        image = cv2.imread(path, cv2.IMREAD_UNCHANGED)

        orig = image.copy()

        #playing with winStride impact the performance, setting it to 7,7 allows us to have
        #a score of 24%
        (rects, weights) = hog.detectMultiScale(image,
                                                winStride=(7, 7),
                                                padding=(8, 8),
                                                scale=1.05)
        #new_rects , new_weights = rects.copy(), weights.copy()
        Del = list()
        threshold = 0
        for i in range(len(rects)):
            x, y, w, h = rects[i, :]
            if weights[i, 0] < threshold:
                #cv2.rectangle(orig, (x, y), (x+w, y+h), (0, 255, 0), 10)
                Del.append(i)
        rects, weights = np.delete(rects, Del, 0), np.delete(weights, Del, 0)
        #cv2_imshow('Pedestrians', frame)
        #plt.imshow(orig)
        #plt.show()

        rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
        pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

        dic_img_box[path] = pick

    ## Exporting as pickle
    pickle.dump(dic_img_box,
                open('dic_img_box_' + '{}'.format(threshold) + '.p', "wb"))

    dic_img_box = pickle.load(
        open('dic_img_box_' + '{}'.format(threshold) + '.p', 'rb'))

    ###Load the image, get the contour of the shape in it and check if they are human-shape like
    ###and returns boxes that could be human in shape
    print("Possible human regions using Contour detection ", "\n")

    dic_img_human = dict()
    for path in tqdm_notebook(img_path_):
        im = cv2.imread(path)
        orig = im.copy()
        height, width = im.shape[:2]
        new_width = 500
        new_height = new_width * height // width
        im = cv2.resize(im, (new_width, new_height),
                        interpolation=cv2.INTER_CUBIC)

        # Change to gray and apply both gaussian and threshold filter
        im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        blurred_im = cv2.GaussianBlur(im_gray, (1, 1), 0)
        ret, thresh = cv2.threshold(blurred_im, 220, 255, 0)

        # Compute contours
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
                                       cv2.CHAIN_APPROX_SIMPLE)
        #print( contours)
        # Get dimension of main contours
        human_boxes = []
        for cnt in contours:

            # Compute area size
            area = cv2.contourArea(cnt)
            if area > 3:  #Chosen after studying are of tarpaulin
                # remove overdimension of contours

                cnt_low = cnt[:, 0]

                # contour width
                x_max = np.max(cnt_low[:, 0]) * width // new_width
                x_min = np.min(cnt_low[:, 0]) * width // new_width
                # contour height
                y_max = np.max(cnt_low[:, 1]) * height // new_height
                y_min = np.min(cnt_low[:, 1]) * height // new_height
                #cv2.rectangle(orig, (x_min, y_min), (x_max, y_max), (0, 255, 0), 10)
                human_boxes.append([x_min, y_min, x_max, y_max])

        #plt.imshow(orig)
        #plt.show()
        dic_img_human[path] = human_boxes

    ## Exporting as pickle
    pickle.dump(dic_img_human, open("dic_img_human.p", "wb"))

    dic_img_human = pickle.load(open('dic_img_human.p', 'rb'))

    print("Keeping the overlaping of the 2 sets of regions as final boxes   ",
          "\n")

    def doOverlap(box1, box2):
        # Returns true if two rectangles(l1, r1)
        # and (l2, r2) overlap
        # If one rectangle is on left side of other
        if (box1[0] > box2[2] or box2[0] > box1[2]):
            return False

        # If one rectangle is above other
        if (box1[1] > box2[3] or box2[1] > box1[3]):
            return False

        return True

    if not os.path.exists('img1_boxes'):
        os.mkdir('img1_boxes')
    dic_final_boxes = dict()
    for (id_im, path) in tqdm_notebook(enumerate(dic_img_box.keys())):
        pick = dic_img_box[path]
        pick_ = np.copy(pick)
        Del = list()
        for i, box1 in enumerate(pick):
            overlap = [doOverlap(box1, box2) for box2 in dic_img_human[path]]
            if sum(overlap) == 0:
                Del.append(i)
        pick_ = np.delete(pick_, Del, 0)
        dic_final_boxes[dic_Paths[path]] = pick_
        image = cv2.imread(dic_Paths[path], cv2.IMREAD_UNCHANGED)
        for (xA, yA, xB, yB) in pick_:
            cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 10)
        cv2.imwrite('img1_boxes' + '/' + '{:03d}'.format(id_im) + '.jpg',
                    image)

    ## Exporting as pickle

    pickle.dump(dic_final_boxes,
                open('dic_final_boxes_' + '{}'.format(threshold) + '.p', "wb"))
    dic_final_boxes = pickle.load(
        open('dic_final_boxes_' + '{}'.format(threshold) + '.p', 'rb'))

    bounding_boxes = list()
    for frame_id, frame_path in enumerate(list(img_path)):
        for bb_id, box in enumerate(dic_final_boxes[frame_path]):
            bounding_boxes.append([
                frame_id, bb_id, box[0], box[1], box[2] - box[0],
                box[3] - box[1]
            ])
    return (bounding_boxes)
Example #26
0
def getTextFromImage(image):
    orig = image.copy()
    (origH, origW) = image.shape[:2]
    # set the new width and height and then determine the ratio in change
    # for both the width and height
    (newW, newH) = 320, 320
    rW = origW / float(newW)
    rH = origH / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]
    # define the two output layer names for the EAST detector model that
    # we are interested in -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
    print("[INFO] loading EAST text detector...")
    net = cv2.dnn.readNet(east_text_detector)
    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (W, H), (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)

    # decode the predictions, then  apply non-maxima suppression to
    # suppress weak, overlapping bounding boxes
    (rects, confidences) = decode_predictions(scores, geometry)
    boxes = non_max_suppression(np.array(rects), probs=confidences)

    results = []

    # loop over the bounding boxes
    for (startX, startY, endX, endY) in boxes:
        # scale the bounding box coordinates based on the respective
        # ratios
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        # in order to obtain a better OCR of the text we can potentially
        # apply a bit of padding surrounding the bounding box -- here we
        # are computing the deltas in both the x and y directions
        dX = int((endX - startX) * 1)
        dY = int((endY - startY) * 1)

        # apply padding to each side of the bounding box, respectively
        startX = max(0, startX - dX)
        startY = max(0, startY - dY)
        endX = min(origW, endX + (dX * 2))
        endY = min(origH, endY + (dY * 2))

        # extract the actual padded ROI
        roi = orig[startY:endY, startX:endX]

        # initialize the list of results
        results = []

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # scale the bounding box coordinates based on the respective
            # ratios
            startX = int(startX * rW)
            startY = int(startY * rH)
            endX = int(endX * rW)
            endY = int(endY * rH)

            # in order to obtain a better OCR of the text we can potentially
            # apply a bit of padding surrounding the bounding box -- here we
            # are computing the deltas in both the x and y directions
            dX = int((endX - startX) * padding)
            dY = int((endY - startY) * padding)

            # apply padding to each side of the bounding box, respectively
            startX = max(0, startX - dX)
            startY = max(0, startY - dY)
            endX = min(origW, endX + (dX * 2))
            endY = min(origH, endY + (dY * 2))

            # extract the actual padded ROI
            roi = orig[startY:endY, startX:endX]
            # in order to apply Tesseract v4 to OCR text we must supply
            # (1) a language, (2) an OEM flag of 4, indicating that the we
            # wish to use the LSTM neural net model for OCR, and finally
            # (3) an OEM value, in this case, 7 which implies that we are
            # treating the ROI as a single line of text
            config = ("-l eng --oem 1 --psm 7")
            text = pytesseract.image_to_string(roi, config=config)

            # add the bounding box coordinates and OCR'd text to the list
            # of results
            results.append(((startX, startY, endX, endY), text))

            # sort the results bounding box coordinates from top to bottom
            results = sorted(results, key=lambda r: r[0][1])

        # loop over the results
        for ((startX, startY, endX, endY), text) in results:
            # display the text OCR'd by Tesseract
            send_message(CHAN, text)
def detect(image):
    """
	Function to perform detection on an image

	:param image: source cv image matrix to perform detection
	:type image: mat
	:return: detected boxes list by it's coordinates in order of startX, startY, endX, endY
	:rtype: int[[]]
	"""
    global rW
    global rH

    orig = image.copy()
    (H, W) = image.shape[:2]

    # resize
    (newW, newH) = (width, height)
    rW = W / float(newW)
    rH = H / float(newH)
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
    net = cv2.dnn.readNet(
        'C:/Users/turnt/OneDrive/Desktop/Rob0Workspace/opencv-text-detection/frozen_east_text_detection.pb'
    )

    # construct a blob from the image and then perform a forward pass of the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (W, H), (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    start = time.time()

    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    end = time.time()
    print("text detection took {:.6f} seconds".format(end - start))

    # grab the number of rows and columns from the scores volume, then
    # initialize our set of bounding box rectangles and corresponding
    # confidence scores
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        # loop over the number of columns
        for x in range(0, numCols):

            # if our score does not have sufficient probability, ignore it
            if scoresData[x] < conf:
                continue

            # print("Found! Conf:{}".format(scoresData[x]))

            # compute the offset factor as our resulting feature maps will
            # be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            # add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    # apply non-maxima suppression to suppress weak, overlapping bounding
    # boxes
    boxes = non_max_suppression(np.array(rects), probs=confidences)
    # drawBoxes(boxes, orig)
    return boxes
Example #28
0
def detector(filename):
    im = cv2.imread(filename)
    im = imutils.resize(im, width=min(400, im.shape[1]))
    min_wdw_sz = (64, 128)
    step_size = (10, 10)
    downscale = 1.6

    clf = joblib.load(os.path.join(model_path, 'svm.model'))

    #List to store the detections
    detections = []
    #The current scale of the image
    scale = 0

    for im_scaled in pyramid_gaussian(im, downscale=downscale):
        #The list contains detections at the current scale
        if im_scaled.shape[0] < min_wdw_sz[1] or im_scaled.shape[
                1] < min_wdw_sz[0]:
            break
        for (x, y, im_window) in sliding_window(im_scaled, min_wdw_sz,
                                                step_size):
            if im_window.shape[0] != min_wdw_sz[1] or im_window.shape[
                    1] != min_wdw_sz[0]:
                continue
            im_window = color.rgb2gray(im_window)
            fd = hog(im_window,
                     orientations=9,
                     pixels_per_cell=(6, 6),
                     cells_per_block=(2, 2),
                     block_norm='L1',
                     visualise=False,
                     transform_sqrt=False,
                     feature_vector=True,
                     normalise=None)
            fd = fd.reshape(1, -1)
            pred = clf.predict(fd)

            if pred == 1:

                if clf.decision_function(fd) > 0.5:
                    detections.append(
                        (int(x * (downscale**scale)),
                         int(y * (downscale**scale)),
                         clf.decision_function(fd),
                         int(min_wdw_sz[0] * (downscale**scale)),
                         int(min_wdw_sz[1] * (downscale**scale))))

        scale += 1

    clone = im.copy()

    rects = np.array([[x, y, x + w, y + h] for (x, y, _, w, h) in detections])
    sc = [score[0] for (x, y, score, w, h) in detections]
    print("sc: ", sc)
    sc = np.array(sc)
    pick = non_max_suppression(rects, probs=sc, overlapThresh=0.3)
    #print ("shape, ", pick.shape)

    for (x_tl, y_tl, _, w, h) in detections:
        cv2.rectangle(im, (x_tl, y_tl), (x_tl + w, y_tl + h), (0, 255, 0),
                      thickness=2)
    for (xA, yA, xB, yB) in pick:
        cv2.rectangle(clone, (xA, yA), (xB, yB), (0, 255, 0), 2)

    plt.axis("off")
    plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
    plt.title("Raw Detection before NMS")
    plt.show()

    plt.axis("off")
    plt.imshow(cv2.cvtColor(clone, cv2.COLOR_BGR2RGB))
    plt.title("Final Detections after applying NMS")
    plt.show()
def detect_cmnd(image):
    orig = image.copy()
    (H, W) = image.shape[:2]

    (newW, newH) = (320, 320)
    rW = W / float(newW)
    rH = H / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    # define the two output layer names for the EAST detector model that
    # we are interested -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    net = cv2.dnn.readNet('frozen_east_text_detection.pb')

    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (W, H), (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)

    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        # loop over the number of columns
        for x in range(0, numCols):
            # if our score does not have sufficient probability, ignore it
            if scoresData[x] < 0.3:
                continue

            # compute the offset factor as our resulting feature maps will
            # be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            # add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    # apply non-maxima suppression to suppress weak, overlapping bounding
    # boxes
    boxes = non_max_suppression(np.array(rects), probs=confidences)

    MAX_y = 0
    MIN_y = orig.shape[0]
    MAX_X = 0
    MIN_X = orig.shape[1]
    for (startX, startY, endX, endY) in boxes:
        # scale the bounding box coordinates based on the respective
        startY = int(startY * rH)
        startX = int(startX * rW)
        endY = int(endY * rH)
        endX = int(endX * rW)

        if endY > MAX_y:
            MAX_y = endY
        if startY < MIN_y:
            MIN_y = startY
        if startX < MIN_X:
            MIN_X = startX
        if endX > MAX_X:
            MAX_X = endX
    new_img = orig[MIN_y:MAX_y + 30, MIN_X - 30:]
    return new_img
Example #30
0
    def _detect_text(self):
        """ """
        # load the input image and grab the image dimensions
        image = cv2.imread(self.image_path)
        orig = image.copy()
        (origH, origW) = image.shape[:2]

        # set the new width and height and then determine the ratio in change
        # for both the width and height
        (newW, newH) = (self.width, self.height)
        rW = origW / float(newW)
        rH = origH / float(newH)

        # resize the image and grab the new image dimensions
        image = cv2.resize(image, (newW, newH))
        (H, W) = image.shape[:2]

        # construct a blob from the image and then perform a forward pass of
        # the model to obtain the two output layer sets
        blob = cv2.dnn.blobFromImage(image,
                                     1.0, (W, H), (123.68, 116.78, 103.94),
                                     swapRB=True,
                                     crop=False)

        self.net.setInput(blob)
        (scores, geometry) = self.net.forward(self.layerNames)

        # decode the predictions, then  apply non-maxima suppression to
        # suppress weak, overlapping bounding boxes
        (rects, confidences) = self._decode_predictions(scores, geometry)
        boxes = non_max_suppression(np.array(rects), probs=confidences)

        # initialize the list of results
        results = []

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # scale the bounding box coordinates based on the respective
            # ratios
            startX = int(startX * rW)
            startY = int(startY * rH)
            endX = int(endX * rW)
            endY = int(endY * rH)

            # in order to obtain a better OCR of the text we can potentially
            # apply a bit of padding surrounding the bounding box -- here we
            # are computing the deltas in both the x and y directions
            dX = int((endX - startX) * self.padding)
            dY = int((endY - startY) * self.padding)

            # apply padding to each side of the bounding box, respectively
            startX = max(0, startX - dX)
            startY = max(0, startY - dY)
            endX = min(origW, endX + (dX * 2))
            endY = min(origH, endY + (dY * 2))

            # extract the actual padded ROI
            roi = orig[startY:endY, startX:endX]

            # in order to apply Tesseract v4 to OCR text we must supply
            # (1) a language, (2) an OEM flag of 4, indicating that the we
            # wish to use the LSTM neural net model for OCR, and finally
            # (3) an OEM value, in this case, 7 which implies that we are
            # treating the ROI as a single line of text
            config = ("-l eng --oem 1 --psm 7")
            text = pytesseract.image_to_string(roi, config=config)

            # add the bounding box coordinates and OCR'd text to the list
            # of results
            results.append(((startX, startY, endX, endY), text))

        # sort the results bounding box coordinates from top to bottom
        results = sorted(results, key=lambda r: r[0][1])

        return results
Example #31
0
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])

# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage(image,
                             1.0, (W, H), (123.68, 116.78, 103.94),
                             swapRB=True,
                             crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)

# decode the predictions, then  apply non-maxima suppression to
# suppress weak, overlapping bounding boxes
(rects, confidences) = decode_predictions(scores, geometry)
boxes = non_max_suppression(np.array(rects), probs=confidences)

# initialize the list of results
results = []

# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
    # scale the bounding box coordinates based on the respective
    # ratios
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)

    # in order to obtain a better OCR of the text we can potentially
    # apply a bit of padding surrounding the bounding box -- here we
Example #32
0
def main():
    try:
        # initialize leds
        gpio.setmode(gpio.BCM)
        gpio.setup(17, gpio.OUT)
        gpio.setup(27, gpio.OUT)
        gpio.output(27, True)

        # initialize the HOG descriptor/person detector
        camera = PiCamera()
        camera.hflip = True
        camera.vflip = True
        camera.resolution = (320, 240)
        camera.framerate = 8
        rawCapture = PiRGBArray(camera, size=(320, 240))
        time.sleep(0.25)
        hog = cv2.HOGDescriptor()
        hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

        Threshold = 0
        features_number = 0

        tracked_features = None
        detected = False

        for frame in camera.capture_continuous(rawCapture,
                                               format="bgr",
                                               use_video_port=True):

            if not detected:  # detection block
                gpio.output(17, False)
                Threshold = 0
                unchangedPointsMap = dict()

                current_frame = frame.array
                #current_frame = imutils.resize(current_frame, width = 300)
                current_frame_copy = current_frame.copy()
                current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)

                # detect people in the image
                (rects, weights) = hog.detectMultiScale(current_frame,
                                                        winStride=(4, 4),
                                                        padding=(8, 8),
                                                        scale=1.5)

                # draw the original bounding boxes
                for i in range(len(rects)):
                    x, y, w, h = rects[i]
                    rects[i][0] = x + 15
                    rects[i][1] = y + 40
                    rects[i][2] = w - 30
                    rects[i][3] = h - 40

                for (x, y, w, h) in rects:
                    cv2.rectangle(current_frame_copy, (x, y), (x + w, y + h),
                                  (0, 0, 255), 2)

        # Filter boxes
                rects = np.array([[x, y, x + w, y + h]
                                  for (x, y, w, h) in rects])
                pick = non_max_suppression(rects,
                                           probs=None,
                                           overlapThresh=0.65)

                # draw the final bounding boxes
                for (xA, yA, xB, yB) in pick:
                    cv2.rectangle(current_frame, (xA, yA), (xB, yB),
                                  (0, 255, 0), 2)

                print("{} original boxes, {} after suppression".format(
                    len(rects), len(pick)))

                if len(rects) > 0:
                    features, height_from_floor = find_features(
                        current_frame, rects[0], 0)
                    #print(features)
                    detected = True
                    gpio.output(17, True)

            if detected:  # Tracking block
                if Threshold == 0:
                    features_number = len(features)
                    Threshold = features_number * threshold_percent

                #print ("Threshold" + str(Threshold))
                if features_number < Threshold:
                    print("Features less than threshold")
                    detected = False
                else:
                    rawCapture.truncate(0)
                    next_frame = frame.array
                    #next_frame = imutils.resize(next_frame, width = 300)

                    current_frame_copy = next_frame.copy()
                    next_frame = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY)

                    #-----------Tracking using LK ---------------------------

                    try:
                        features = np.array(features)

                        (tracked_features, status,
                         feature_errors) = cv2.calcOpticalFlowPyrLK(
                             current_frame, next_frame, features, None,
                             **lk_params)

                        arr_x = []
                        arr_y = []

                        for i in range(len(tracked_features)):
                            f = tracked_features[i]
                            x = f[0][0]
                            y = f[0][1]

                            arr_x.append(x)
                            arr_y.append(y)

                        arr_x = sorted(arr_x)
                        arr_y = sorted(arr_y)

                        mid = len(arr_x) / 2
                        X = arr_x[mid]
                        mid = len(arr_y) / 2
                        Y = arr_y[mid]

                        #print(X)

                        new_feature_number = 0
                        temp_set_number = []
                        temp_distance = []
                        j = 0

                        print("Height_from_floor" + str(height_from_floor))
                        print("num" + str(features_number))
                        #print ("Status" + str(status))
                        #print ("Status[0]   " + str(status[0]))
                        #print ("Status[1]   " + str(status[1]))
                        #print ("Status[1][0]   " + str(status[1][0]))

                        for i in range(features_number):
                            if status[i][0] == 1:
                                new_feature_number += 1
                                temp_distance.append(height_from_floor[i])

                        print(temp_distance)
                        height_from_floor = []

                        print("Here")

                        for i in range(len(temp_distance)):
                            height_from_floor.append(temp_distance[i])

                        print("Here2")

                        features_number = new_feature_number
                        features = []

                        for i in range(features_number):
                            features.append(tracked_features[i])

                        features = np.array(features)
                        tracked_features = []
                        current_frame = next_frame.copy()

                    except Exception, e:
                        raise e

#-------Compute Distance --------------------
                    status, v = scaled_people_floor(features_number, features,
                                                    height_from_floor)

                    if status:
                        distance = compute_distance(v)
                        print(distance)
                        cv2.putText(current_frame_copy, str(distance),
                                    (current_frame_copy.shape[1] - 200,
                                     current_frame_copy.shape[0] - 20),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.75,
                                    (0, 255, 0), 3)

#-------Showing Points ------------------------
                    for i in range(features_number):
                        cv2.circle(current_frame_copy, tuple(features[i][0]),
                                   3, 255, -1)

                    cv2.circle(current_frame_copy, (X, Y), 5, (0, 0, 255), -1)

            # show the output images

            cv2.imshow("HOG", current_frame_copy)
            key = cv2.waitKey(1) & 0xFF
            rawCapture.truncate(0)

            if key == ord("w"):
                break
    except KeyboardInterrupt, SystemExit:
        gpio.output(27, False)
        gpio.output(17, False)
        camera.release()
        cv2.destroyAllWindows()
        raise
Example #33
0
def get_charaters(image, resise_factor = 20, confidence_limit=0.5, padding=7):
	## Image preprocessing

	# Resise image
	height_resised = 32 * resise_factor
	width_resised = 32 * resise_factor
	image = cv2.resize(image, (height_resised, width_resised))

	# Save orginial image for drawing puposes later
	originial_image = image

	# Display image
	# cv2.imshow("Image", image)
	# cv2.waitKey(0)
	# cv2.imwrite('images/original.jpg', image)

	## Text detection using EAST

	# Load pre-trained text detector
	net = cv2.dnn.readNet('frozen_east_text_detection.pb')

	# Define the two output layer names for the EAST detector model that
	# we are interested -- the first is the output probabilities and the
	# second can be used to derive the bounding box coordinates of text
	layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

	# Construct a blob from the image and then perform a forward pass of
	# the model to obtain the two output layer sets
	blob = cv2.dnn.blobFromImage(image, 1.0, (height_resised, width_resised), 
		(123.68, 116.78, 103.94), swapRB=True, crop=False)
	net.setInput(blob)
	scores, geometry = net.forward(layerNames)

	# Grab the number of rows and columns from the scores volume, then
	# initialize our set of bounding box rectangles and corresponding
	# confidence scores
	(numRows, numCols) = scores.shape[2:4]
	rects = []
	confidences = []
	
	# Loop over the number of rows
	for y in range(0, numRows):
		# Extract the scores (probabilities), followed by the geometrical
		# data used to derive potential bounding box coordinates that
		# surround text
		scoresData = scores[0, 0, y]
		xData0 = geometry[0, 0, y]
		xData1 = geometry[0, 1, y]
		xData2 = geometry[0, 2, y]
		xData3 = geometry[0, 3, y]
		anglesData = geometry[0, 4, y]

		# Loop over the number of columns
		for x in range(0, numCols):
			# If our score does not have sufficient probability, ignore it
			if scoresData[x] < confidence_limit:
				continue
	
			# Compute the offset factor as our resulting feature maps will
			# be 4x smaller than the input image
			(offsetX, offsetY) = (x * 4.0, y * 4.0)
	
			# Extract the rotation angle for the prediction and then
			# compute the sin and cosine
			angle = anglesData[x]
			cos = np.cos(angle)
			sin = np.sin(angle)
	
			# Use the geometry volume to derive the width and height of
			# the bounding box
			h = xData0[x] + xData2[x]
			w = xData1[x] + xData3[x]
	
			# Compute both the starting and ending (x, y)-coordinates for
			# the text prediction bounding box
			endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
			endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
			startX = int(endX - w)
			startY = int(endY - h)
	
			# Add the bounding box coordinates and probability score to
			# our respective lists
			rects.append((startX, startY, endX, endY))
			confidences.append(scoresData[x])

	# Apply non-maxima suppression to suppress weak, overlapping bounding
	# boxes
	boxes = non_max_suppression(np.array(rects), probs=confidences)
	
	## Running OCR

	# Gray scale image
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Applying adaptive thresholding
	image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1)
	# cv2.imshow('Adaptive threshold', image)
	# cv2.waitKey(0)
	# cv2.imwrite('images/thresholded.jpg', image)

	# Applying gaussian blur
	# image = cv2.GaussianBlur(image, (7, 7), 0)
	# cv2.imshow('Gaussian blur', image)
	# cv2.waitKey(0)

	# Loop over the bounding boxes and get text
	results = []
	for (startX, startY, endX, endY) in boxes:
		# Apply padding to bounding boxes
		startX = max(0, startX - padding)
		startY = max(0, startY - padding)
		endX = min(width_resised, endX + padding)
		endY = min(height_resised, endY + padding)

		# Extract bounding box as roi (region of interest)
		roi = image[startY:endY, startX:endX]
		roi = cv2.adaptiveThreshold(roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1)

		# In order to apply Tesseract v4 to OCR text we must supply
		# (1) a language, (2) an OEM flag of 4, indicating that the we
		# wish to use the LSTM neural net model for OCR, and finally
		# (3) an OEM value, in this case, 7 which implies that we are
		# treating the ROI as a single line of text
		config = ("-l eng --oem 1 --psm 7")
		text = pytesseract.image_to_string(roi, config=config)
	
		# add the bounding box coordinates and OCR'd text to the list
		# of results
		results.append(((startX, startY, endX, endY), text))
	
	## Printing result

	# Sort the results bounding box coordinates from top to bottom
	results = sorted(results, key=lambda r:r[0][1])
	
	# Loop over the results
	ocr_outputs = []
	marked_image = originial_image
	for ((startX, startY, endX, endY), text) in results:
		# display the text OCR'd by Tesseract
		print("OCR TEXT")
		print("========")
		print("{}\n".format(text))
	
		# Draw the bounding box on the image (TESING PURPOSES ONLY)
		cv2.rectangle(marked_image, (startX, startY), (endX, endY), (0, 255, 0), 2)
	
		# Strip out non-ASCII text so we can draw the text on the image
		# using OpenCV, then draw the text and a bounding box surrounding
		# the text region of the input image
		text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
		cv2.putText(marked_image, text, (startX, startY - 20),
			cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3
		)
		
		# Add outputs to list to be returned
		ocr_outputs.append(text)
	

	# Show the output image
	# cv2.imshow("Text detected", marked_image)
	# cv2.waitKey(0)
	# cv2.imwrite('images/final.jpg', marked_image)

	# Return all ocr outputs
	return originial_image, image, marked_image, ocr_outputs
Example #34
0
def OpenFile():
    name = askopenfilename(initialdir="/",
                           title="Select file",
                           filetypes=(("jpeg files", "*.jpg *.png"),
                                      ("all files", "*.*")))

    global fileName
    fileName = name
    global image_address
    image_address = fileName
    try:
        img = ImageTk.PhotoImage(Image.open(fileName))
        p2 = tk.Label(root, image=img).pack()
    except Exception:
        pass

    #detection code start from here

#constant variables for text detection
    min_confidence = 0.5
    width = 320
    height = 320

    # load the input image
    image = cv2.imread(image_address)
    orig = image.copy()
    (H, W) = image.shape[:2]

    # set new width and height
    (newW, newH) = (width, height)
    rW = W / float(newW)
    rH = H / float(newH)

    # resize the image
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    #the first layer is the output probabilities
    #the second layer is used to derive the bounding box coordinates of text
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
    print("[INFO] loading EAST text detector...")
    net = cv2.dnn.readNet('frozen_east_text_detection.pb')

    # construct a blob from the image
    # create the model with the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (W, H), (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)

    # get rows and coloums from score
    #intialize rectangles gor bounding boxs and confidence score
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    # loop over rows
    for y in range(0, numRows):
        # get the score data and dimensions of the rectangle
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        # loop over columns
        for x in range(0, numCols):
            # score not confident
            if scoresData[x] < min_confidence:
                continue

            # compute the offset factor
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # get rotation angle and make sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # width and height of the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # compute x and y coordinates of the bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            # add the rectangles and score made
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    # apply non_max_suppression
    boxes = non_max_suppression(np.array(rects), probs=confidences)

    #sorting of text arrangement
    def func(val):
        return val[1]

    boxes = sorted(boxes, key=func)

    #function to find if r1 rectangle contains r2 rectangle
    def contains(r1, r2):
        return (r1[0] < r2[0] < r2[0] + r2[2] < r1[0] + r1[2]) and (
            r1[1] < r2[1] < r2[1] + r2[3] < r1[1] + r1[3])

#list to contain all text in character form
#eg crop_imag = [[H,E,L,L,O][W,O,R,L,D]]

    crop_img = []

    # loop over the bounding boxes
    for (startX, startY, endX, endY) in boxes:
        # scale the bounding box coordinates based on the respective
        # ratios
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        #croping of text from original image
        unit = orig[startY:endY, startX:endX]

        #changing cropped image to grayscale image
        gray_img = cv2.cvtColor(unit, cv2.COLOR_BGR2GRAY)

        #finding threshold value from the grayscale average value
        threshold = np.mean(gray_img)
        #getting the binary image with the help of thresholding
        _, thresh = cv2.threshold(gray_img, threshold, 255, cv2.THRESH_BINARY)
        #diliation and erosion
        #            thresh = cv2.dilate(thresh, kernel, iterations=1)
        #            thresh = cv2.erode(thresh, kernel, iterations=1)

        #Finding contours in the binary image
        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE,
                                       cv2.CHAIN_APPROX_SIMPLE)

        #list of contour polygon and bounding rectangle
        contours_poly = [None] * len(contours)
        boundRect = [None] * len(contours)

        #looping in the contour list
        for i, c in enumerate(contours):
            #making polygons of the contours
            contours_poly[i] = cv2.approxPolyDP(c, 3, True)
            #making rectangles from that polygon
            # returns x,y,h,w of rectangle in a list
            boundRect[i] = cv2.boundingRect(contours_poly[i])

        #sorting the rectangle boxes into ascending order of x
        boundRect = sorted(boundRect, key=lambda x: x[0])
        #list to crop character image from text
        char_crop = []

        #cropping the rectangles that are not in another rectangle
        for i in range(len(boundRect)):
            count = 1
            for j in range(len(boundRect)):
                if not i == j:
                    if not contains(boundRect[j], boundRect[i]):
                        count += 1
                        if count == len(boundRect):
                            char_crop.append(
                                unit[boundRect[i][1]:boundRect[i][1] +
                                     boundRect[i][3],
                                     boundRect[i][0]:boundRect[i][0] +
                                     boundRect[i][2]])

        #adding the cropped characters to the final list
        crop_img.append(char_crop)

    #importing keras files to load trained model
    from keras.models import model_from_json

    # load json and create model
    json_file = open('model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights("model.h5")

    # Compile model
    loaded_model.compile(optimizer='adam',
                         loss='categorical_crossentropy',
                         metrics=['accuracy'])

    #funtion to return the predicition location that is equal to 1
    def result(array):
        rt = 26
        for i in range(0, len(array[0])):
            if array[0][i] == 1:
                rt = i
        return rt

    #prediction list as the model was trained
    predict_list = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
        'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ''
    ]
    global content
    content = ''

    #predicting every character in the text in the crop_img
    for text_img in crop_img:
        for image in text_img:
            image = cv2.resize(image, (32, 32))
            image = np.expand_dims(image, axis=0)
            rslt = result(loaded_model.predict(image))
            content += predict_list[rslt]
        content += ' '

    print(content)
    out = tk.Label(root, justify=tk.CENTER, padx=10, text=content).pack()
def main():
    # read the video
    readVideo = cv2.VideoCapture("london_bus.mp4")

    cv2.namedWindow("Pedestrian Detection")

    detectedPedestrians = {}
    firstFrame = True
    frames = 0
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output.avi', fourcc, 240.0, (640, 480))
    pauseVideo = False
    while True:

        if pauseVideo == False:
            flagCaptured, frame = readVideo.read()
        #print('Frame=',frame)
        if (flagCaptured is False):
            print("could not get frame")
            break

        # initialize the HOG descriptor

        hog = cv2.HOGDescriptor()
        hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

        # winStride and hitThreshold are used to adjust for maximizing detections and reducing
        # false positives
        (rects, weights) = hog.detectMultiScale(frame,
                                                winStride=(8, 8),
                                                padding=(8, 8),
                                                scale=1.05,
                                                hitThreshold=0.22)

        # get bigger than needed bounding boxes and then apply non-maxima suppression
        rectBoxes = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])

        # apply non-maxima suppression to the huge bounding boxes
        suppressedRectBoxes = non_max_suppression(rectBoxes,
                                                  probs=None,
                                                  overlapThresh=0.95)
        counter = 0
        # draw the final bounding boxes
        for (xA, yA, xB, yB) in suppressedRectBoxes:
            cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2)

            # identifying a new box is donw only once. An already identified box is tracked
            # instead of detecting all over again
            if firstFrame is True:
                detectedPedestrians[counter] = pedestrianTracking(
                    counter, frame, (xA, yA, abs(xB - xA), abs(yB - yA)))
                #(id, frame, bounding_box)
                counter += 1

        for key, value in detectedPedestrians.items():
            value.update_predict(frame)

        firstFrame = False
        frames += 1
        #print(frames)

        cv2.imshow("Pedestrian Detection", frame)
        out.write(frame)

        # press ESC to close video window
        if (cv2.waitKey(10) & 0xFF) == 27:
            cv2.destroyWindow("Pedestrian Detection")
            break

        # press spacebar to pause video
        if (cv2.waitKey(10) & 0xFF) == 32:
            print('Video paused')
            pauseVideo = True

        # press enter to resume
        if (cv2.waitKey(10) & 0xFF) == 13:
            print('Video resumed')
            pauseVideo = False

    out.release()
    readVideo.release()
Example #36
0
def get_caffe_detections(fname, img):

    detector = init_caffe()
    pretrained_model = "./models/bvlc_reference_rcnn_ilsvrc13.caffemodel"  #help="Trained model weights file."
    model_def = "./models/deploy.prototxt" #help="Model definition file."
    labels_file = './models/det_synset_words.txt'


    COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax']
    
    boxes = my_sliding_windows.get_windows(fname, img, 40, 106) #50, 256
    TESTDATA = io.StringIO(my_sliding_windows.get_str_to_csv(boxes))

    # Load input.
    t = time.time()
    print("Loading input...")

    f = TESTDATA

    # Detect.
    # 123 index is human

    inputs = pd.read_csv(f, sep=',', dtype={'filename': str})
    inputs.set_index('filename', inplace=True)

    # Unpack sequence of (image filename, windows).
    images_windows = [
        (ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values)
        for ix in inputs.index.unique()
    ]
    detections = detector.detect_windows(images_windows)

    #using selective search
    # detections = detector.detect_selective_search(inputs)

    print("Processed {} windows in {:.3f} s.".format(len(detections),
                                                     time.time() - t))

    #loop through the output and filter humans

    #get labels for classes
    with open(labels_file) as f:
        labels_df = [
            {
                'synset_id': l.strip().split(' ')[0],
                'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0]
            }
            for l in f.readlines()
        ]

    # print "detections object: ", detections[0]

    # imgcpy = cv2.imread(fname)
    totalrects = []

    for i in detections:

        prdt = i['prediction']
        maxp = max(prdt)
        prdt = map(lambda x: (x), prdt);
        maxi = prdt.index(maxp)

        if maxi == 123 and maxp > 0.5:
            # print "human detected!"
            coord = i['window']
            totalrects.append([coord[1], coord[0], coord[3], coord[2]])

    print "Total humans detected: ", str(len(totalrects))
    #non max suppression
    totalrects = np.array(totalrects)
    processedboxes = non_max_suppression(totalrects, probs=None, overlapThresh=0.55) #overlapThresh default 0.65
    print "Total humans after NMS correction: ", str(len(processedboxes))

    return processedboxes
def main():
	try:
  		# initialize leds
  		gpio.setmode(gpio.BCM)
  		gpio.setup(17, gpio.OUT)
  		gpio.setup(27, gpio.OUT)
  		gpio.output(27, True)

  		# initialize the HOG descriptor/person detector
  		camera = PiCamera()
  		camera.hflip = True
  		camera.vflip = True
  		camera.resolution = (320, 240)
  		camera.framerate = 8
  		rawCapture = PiRGBArray(camera, size=(320, 240)) 
  		time.sleep(0.25)
  		hog = cv2.HOGDescriptor()
  		hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

  		Threshold = 0
  		features_number = 0

  		tracked_features = None
  		detected = False

  		for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True):

  			if not detected: # detection block
  				gpio.output(17, False)
  				Threshold = 0
  				unchangedPointsMap = dict()

  				current_frame = frame.array
  				#current_frame = imutils.resize(current_frame, width = 300)
  				current_frame_copy = current_frame.copy()
  				current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
  			
  				# detect people in the image
  				(rects, weights) = hog.detectMultiScale(current_frame, winStride=(4, 4),
  					padding=(8, 8), scale=1.5)
  		 
  				# draw the original bounding boxes
  				for i in range(len(rects)):
  					x, y, w, h = rects[i]
  					rects[i][0] = x + 15
  					rects[i][1] = y + 40
  					rects[i][2] = w - 30
  					rects[i][3] = h - 40

  				for (x, y, w, h) in rects:
  					cv2.rectangle(current_frame_copy, (x, y), (x + w, y + h), (0, 0, 255), 2)
  		 	
  		 		# Filter boxes
  				rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
  				pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
  		 
  				# draw the final bounding boxes
  				for (xA, yA, xB, yB) in pick:
  					cv2.rectangle(current_frame, (xA, yA), (xB, yB), (0, 255, 0), 2)
  			
  				print("{} original boxes, {} after suppression".format(len(rects), len(pick)))
  			
  				if len(rects) > 0:
  					features, height_from_floor = find_features(current_frame, rects[0], 0)
  					#print(features)
  					detected = True
  					gpio.output(17, True)
  		
  			if detected: # Tracking block
  				if Threshold == 0:
  					features_number = len(features)
  					Threshold = features_number * threshold_percent

  				#print ("Threshold" + str(Threshold))
  				if features_number < Threshold:
  					print ("Features less than threshold")
  					detected = False
  				else:
  					rawCapture.truncate(0)
  					next_frame = frame.array
  					#next_frame = imutils.resize(next_frame, width = 300)
  			
  					current_frame_copy = next_frame.copy()
  					next_frame = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY)

  	 				#-----------Tracking using LK ---------------------------

  	 				try:
  	 					features = np.array(features)

  	 					(tracked_features, status, feature_errors) = cv2.calcOpticalFlowPyrLK(current_frame, next_frame, features, None, **lk_params)

  						arr_x = []
  						arr_y = []

  						for i in range(len(tracked_features)):
  							f = tracked_features[i]
  							x = f[0][0]
  							y = f[0][1]

  							arr_x.append(x)
  							arr_y.append(y)
  					
  						arr_x = sorted(arr_x)
  						arr_y = sorted(arr_y)

  						mid = len(arr_x)/2
  						X = arr_x[mid]
  						mid = len(arr_y)/2
  						Y = arr_y[mid]
              
  						#print(X)

  						new_feature_number = 0
  						temp_set_number = []
  						temp_distance = []
  						j = 0

  						print ("Height_from_floor" + str(height_from_floor))
  						print ("num" + str(features_number))
  						#print ("Status" + str(status))
  						#print ("Status[0]   " + str(status[0]))
  						#print ("Status[1]   " + str(status[1]))
  						#print ("Status[1][0]   " + str(status[1][0]))
						
  						for i in range(features_number):
  							if status[i][0] == 1:
  								new_feature_number += 1
  								temp_distance.append(height_from_floor[i])

						print (temp_distance)
  						height_from_floor = []

  						print ("Here")

  						for i in range(len(temp_distance)):
  						 	height_from_floor.append(temp_distance[i])

  						print ("Here2")

  						features_number = new_feature_number
  						features = []

  						for i in range(features_number):
  							features.append(tracked_features[i])

  						features = np.array(features)
  						tracked_features = []
  						current_frame = next_frame.copy()

  	 				except Exception, e:
  	 					raise e

  	 				#-------Compute Distance --------------------
  	 				status, v = scaled_people_floor(features_number, features, height_from_floor)

  	 				if status:
  	 					distance = compute_distance(v)
  	 					print (distance)
						cv2.putText(current_frame_copy, str(distance),(current_frame_copy.shape[1] - 200, current_frame_copy.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), 3)

  	 				#-------Showing Points ------------------------
  	 				for i in range(features_number):
  	 					cv2.circle(current_frame_copy,
  	 						   	tuple(features[i][0]),
  	 						   	3,
  	 						   	255,
  	 						   	-1)

  	 				cv2.circle(current_frame_copy,
  	 							(X,Y),
  	 							5,
  	 							(0,0,255),
  	 							-1)
  			
  			# show the output images
			
  			cv2.imshow("HOG", current_frame_copy)
  			key = cv2.waitKey(1) & 0xFF
  			rawCapture.truncate(0)
  		
  			if key == ord("w"):
  				break
  	except KeyboardInterrupt, SystemExit:
    		gpio.output(27, False)
		gpio.output(17, False)
		camera.release()
		cv2.destroyAllWindows()
		raise
Example #38
0
def do_for_all(original_image, index):
	# construct the argument parser and parse the arguments
	
	# load the input image and grab the image dimensions
	# image = cv2.imread(args["image"])
	# Reading Image
	if os.path.isfile(original_image): 
		image = cv2.imread(original_image)
	else:
		print('unable to read file in tesseract_text_detection.py')
		return
	# Resize Image to Standard Ratio
	image = resize_toStandard(image, "Adhar")

	temp = image
	orig = image.copy()
	(H, W) = image.shape[:2]
	
	# set the new width and height and then determine the ratio in change
	# for both the width and height
	(newW, newH) = (args["width"], args["height"])
	rW = W / float(newW)
	rH = H / float(newH)
	
	# resize the image and grab the new image dimensions
	image = cv2.resize(image, (newW, newH))
	(H, W) = image.shape[:2]
	
	# define the two output layer names for the EAST detector model that
	# we are interested -- the first is the output probabilities and the
	# second can be used to derive the bounding box coordinates of text
	layerNames = [
		"feature_fusion/Conv_7/Sigmoid",
		"feature_fusion/concat_3"]
	
	# load the pre-trained EAST text detector
	print("[INFO] loading EAST text detector...")
	net = cv2.dnn.readNet("frozen_east_text_detection.pb")
	
	# construct a blob from the image and then perform a forward pass of
	# the model to obtain the two output layer sets
	blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
		(123.68, 116.78, 103.94), swapRB=True, crop=False)
	start = time.time()
	net.setInput(blob)
	(scores, geometry) = net.forward(layerNames)
	end = time.time()
	
	# show timing information on text prediction
	print("[INFO] text detection took {:.6f} seconds".format(end - start))
	
	# grab the number of rows and columns from the scores volume, then
	# initialize our set of bounding box rectangles and corresponding
	# confidence scores
	(numRows, numCols) = scores.shape[2:4]
	rects = []
	confidences = []
	
	# loop over the number of rows
	for y in range(0, numRows):
		# extract the scores (probabilities), followed by the geometrical
		# data used to derive potential bounding box coordinates that
		# surround text
		scoresData = scores[0, 0, y]
		xData0 = geometry[0, 0, y]
		xData1 = geometry[0, 1, y]
		xData2 = geometry[0, 2, y]
		xData3 = geometry[0, 3, y]
		anglesData = geometry[0, 4, y]
	
		# loop over the number of columns
		for x in range(0, numCols):
			# if our score does not have sufficient probability, ignore it
			if scoresData[x] < args["min_confidence"]:
				continue
	
			# compute the offset factor as our resulting feature maps will
			# be 4x smaller than the input image
			(offsetX, offsetY) = (x * 4.0, y * 4.0)
	
			# extract the rotation angle for the prediction and then
			# compute the sin and cosine
			angle = anglesData[x]
			cos = np.cos(angle)
			sin = np.sin(angle)
	
			# use the geometry volume to derive the width and height of
			# the bounding box
			h = xData0[x] + xData2[x]
			w = xData1[x] + xData3[x]
	
			# compute both the starting and ending (x, y)-coordinates for
			# the text prediction bounding box
			endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
			endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
			startX = int(endX - w)
			startY = int(endY - h)
	
			# add the bounding box coordinates and probability score to
			# our respective lists
			rects.append((startX, startY, endX, endY))
			confidences.append(scoresData[x])
	
	# apply non-maxima suppression to suppress weak, overlapping bounding
	# boxes
	boxes = non_max_suppression(np.array(rects), probs=confidences)
	
	
	
	
	i=0
	# if not exist(os.path("./output")):
	# 	os.mkdir('./output')
	# loop over the bounding boxes
	
	temp = resize_toStandard(temp, "Adhar")
	# cv2.imshow('hey', temp)
	# print(boxes)
	
	mod_boxes = []
	for a,b,c,d in boxes:
		if(b<=290 and d<=290 and b>=56 and d>=56 and (c-a)>=50):
			mod_boxes.append([a,b,c,d])
	# print(mod_boxes)




	for i in range(len(boxes)):
		boxes[i][0] = boxes[i][0] - 5 # startX
		boxes[i][1] = boxes[i][1] - 5 # startY
		boxes[i][2] = boxes[i][2] + 12 # endX
		boxes[i][3] = boxes[i][3] + 5 # endY

	for i in range(len(boxes)):
		for j in range(i):
			box1 = boxes[i]
			box2 = boxes[j]
			if check_merge(box1, box2, i, j):
				# print("cool")
				# print("boxes : ", boxes,"\n box1 : =" ,box1,"\n box2 : ", box2)
				if boxes[i][0] < boxes[j][0]:
					boxes[i] = boxes[j] = [boxes[i][0], boxes[i][1], boxes[j][2], boxes[j][3]]
				else:
					boxes[i] = boxes[j] = [boxes[j][0], boxes[j][1], boxes[i][2], boxes[i][3]]
	for i in range(len(boxes)):
		for j in range(i):
			box1 = boxes[i]
			box2 = boxes[j]
			if check_merge(box1, box2, i, j):
				# print("cool")
				# print("boxes : ", boxes,"\n box1 : =" ,box1,"\n box2 : ", box2)
				if boxes[i][0] < boxes[j][0]:
					boxes[i] = boxes[j] = [boxes[i][0], boxes[i][1], boxes[j][2], boxes[j][3]]
				else:
					boxes[i] = boxes[j] = [boxes[j][0], boxes[j][1], boxes[i][2], boxes[i][3]]
	
	# for box in boxes:

	newboxes=[]
	flag = True
	#newboxes1=[]
	#print(boxes)
	for (a,b,c,d) in boxes:
		if(b<=290 and d<=290 and b>=56 and d>=56 and (c-a)>=70):
			if not len(newboxes):
				newboxes.append([a,b,c,d])
			else:
				for i in range(len(newboxes)):
					( sx, sy, ex, ey) = newboxes[i]
					#print(i, a, b, c, d, " : " ,sx, sy, ex, ey, (sx == a and sy == b), (ex == c and ey == d), ((sx == a and sy == b) and (ex == c and ey == d)))
					if ((sx == a and sy == b) and (ex == c and ey == d)):
						flag = False
						# print(newboxes)
				if flag:
					newboxes.append([a, b, c, d])
				flag = True


	#print(boxes)
	#print(newboxes)
	newboxes.sort(key = conditional_sort)
	# print(newboxes)
	newboxes.pop(0)

	#if len(newboxes) > 2:
		#newboxes[1][0] = newboxes[1][0] + (newboxes[1][2] - newboxes[1][1])*0.58
		#newboxes[2][0] = newboxes[2][0] + (newboxes[2][2] - newboxes[2][1])*0.71

	ind = -1
	# print(boxes, len(boxes))
	# print(mod_boxes)
	text_recognized = []
	# text_recognized.append(original_image.split('./')[len(original_image.split('./'))-1])
	print("############", index, "############")
	for (startX, startY, endX, endY) in newboxes:
		# scale the bounding box coordinates based on -the respective
		# ratios
		ind += 1
		if(ind==1):
			startX-= (startX-endX)*0.55
		elif(ind==2):
			startX-= (startX-endX)*0.45

		# print("i = ", ind , startX, startY, endX, endY)
		startX = int(startX * rW)
		startY = int(startY * rH)
		endX = int(endX * rW)
		endY = int(endY * rH)
		imgName = "./output/crop"+str(index)+"_"+str(ind)+".png"
	
		# print("actual i: " , ind, "dim : ", startX, startY, endX, endY)
	
		cv2.imwrite(imgName, temp[startY: endY, startX: endX])

		text = pytesseract.image_to_string(Image.open(imgName), lang='eng', \
			config='--psm 8 --oem 3 -c tessedit_char_whitelist=  0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/')
		
		imgName = imgName.split('./')[len(imgName.split('./'))-1]
		imgName = imgName.split('/')[len(imgName.split('/'))-1]
		text_recognized.append(imgName)
		text_recognized.append(text)
		
		print("-=======",imgName,"======--")
		print(text)
		print("------------------")

		label = str(ind)
		# draw the bounding box on the image
		# cv2.rectangle(orig, (startX-10, startY-5), (endX+10, endY+5), (0, 255, ind*10), 2)
		cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, ind*10), 2)
		cv2.putText(orig,label,(startX-10, startY-5),cv2.FONT_HERSHEY_COMPLEX,0.5,(0,0,0),1)
	
	# show the output image
	print("===========================\n\n")
	
	if len(text_recognized) == 8:
		new_num = text_recognized.pop()
		n = ''
		for x in new_num:
			if (ord(x)<=57 and ord(x)>=48) or ord(x) == ord(' '):
				n+=x
		text_recognized.append(n) 
	p = "./fullCard/"+str(index)+".png"
	cv2.imwrite(p, orig)
	#pt.imshow(orig)
	#pt.show()
	# cv2.imshow("Text Detection", orig)
	# cv2.waitKey(0)
	
	return text_recognized
Example #39
0
def body_detection():

    global frame_hist
    global first
    global tracker_count
    global t_x
    global t_y
    global t_w
    global t_h 
    global t_x_bar 
    global t_y_bar
    global output_file_path
    global finalLOG_file_path
    global last_server_resp
    global last_upload
    global save_body_path
    global t
    global prev_frame
    global frame2gray
    global frame1gray
    global result_frame


    while True: 

        # read each frame
        ret, frame = webcam.read()
        if ret is False:
            print('NO FRAME COMING, Return value is ',ret)


        # Un comment the below part iff you want to log system status every minute 

        # ######### LOG THIS INFO IN FINAL LOG VERY MINUTE #################
        # if time.time()-t > 60:

        #     with open(finalLOG_file_path ,"a") as output:
        #         output.write("_________ WRITING LOG AT "+time.strftime("%H:%M:%S")+" DATE "+time.strftime("%Y-%m-%d")+"_________ \n\n"+"CAMERA DEVICE ID: cv2.VideoCapture(1)\n")
        #         #CHECK CAMERA
        #         if webcam.isOpened():
        #             output.write("CAMERA FEED STATUS AT "+time.strftime("%H:%M:%S")+"  AVAILABLE\n")
        #         else:
        #             output.write("CAMERA FEED STATUS AT " +time.strftime("%H:%M:%S")+ "  NOT AVAILABLE \n")

        #         #CHECK FRAME CAPTURE
        #         if ret==True:
        #             output.write("CURRENT FRAME STATUS AT " +time.strftime("%H:%M:%S")+" YES\n")       
        #         else:
        #             output.write("CURRENT FRAME STATUS AT "+time.strftime("%H:%M:%S")+" NO\n")
        #         # WRITE LAST UPLOAD AND SERVER RESPONSE TO FILE
        #         try:
        #             if not queue_LOG_LastUpload.empty():
        #                 last_upload = queue_LOG_LastUpload.get()
        #             if not queue_LOG_ServerResp.empty():
        #                 last_server_resp = queue_LOG_ServerResp.get()
        #         except:
        #             logging.basicConfig(filename='errorLogs.txt',level=logging.DEBUG)
        #             logging.info('problem getting values from diagonistcs queue')

        #         # ADD LAST UPLOAD AND SERVER RESPONSE TO FILE
        #         output.write("LAST UPLOADING FILE NAME: "+ last_upload + "\n"+"LAST SERVER RESPONSE: "+ last_server_resp +"\n\n")
        #         output.write("_______________END OF LOG SEGMENT___________________\n\n\n")
        #     output.close()
        #     t=time.time()
        # ##########################

        # Pre process every frame for motion analysis
        if frame_hist == 0:
            hsv_roi =  cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
            mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
            roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
            cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
            frame_hist = 1

        # resize every frame to reduce computational power 
        image = imutils.resize(frame, width=min(300, frame.shape[1]))
        orig = image.copy()

        # Process every frame for motion analysis
        # convert frame to gray
        frame2gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY )
        #Absdiff to get the difference between the frames
        result_frame = cv2.absdiff(frame1gray,frame2gray)
        # Pre process every result frame for motion analysis
        result_frame = cv2.blur(result_frame,(5,5))
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(5,5))
        result_frame = cv2.morphologyEx(result_frame, cv2.MORPH_OPEN, kernel)
        result_frame = cv2.morphologyEx(result_frame, cv2.MORPH_CLOSE, kernel)
        val , result_frame = cv2.threshold(result_frame, 13, 255, cv2.THRESH_BINARY_INV)

        # Check for motion. If there is any motion try to detect humans.
        if somethingHasMoved(result_frame):
    
            # detect people in the frame using our hog classifier
            (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
                padding=(0, 0), scale=1.1)

            # If no detection reset these values to any random large values
            if len(rects) == 0:
                t_x = 10001
                t_y = 10001
                t_w = 10031
                t_h = 10031
                t_x_bar = 100021
                t_y_bar = 100011

            # Iterate through all of the detected human bodies
            for i in range(len(rects)):

                body_i = rects[i]
                (x, y, w, h) = [v * 1 for v in body_i]

                # draw the bounding boxes for every detection
                cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

                # apply non-maxima suppression to reduce overlapping
                rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
                pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

                # # Iterate through all of the detected human bodies after NON NAXIMA 
                for i in range(len(pick)):

                    body_i = pick[i]
                    (xA, yA, xB, yB) = [int(v * 1) for v in body_i]

                    # draw the final bounding boxes after NON MAXIMA
                    #cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

                    # Now draw bounding boxes on original full resolution frame
                    # First map the final bounding boxes to the original frame 
                    # Calculated multiplication factor varries with camera resolution
                    # (4.28 for 1280x720) (6.4 for 1920x1080)

                    (x1, y1, w1, h1) = [int(v * 4.28) for v in body_i]
                    cv2.rectangle(frame, (x1, y1), (w1, h1), (0, 255, 55), 2)

                    # for tracking , every time current rectangle is the new rectangle/bounding box
                    curr_rect = (x1, y1, w1, h1)

                    # for first run, set tracking window here
                    if first == 0:
                        track_window = curr_rect
                        first = 1

                    #calculate the centerpoint of NEW rectangle/bounding boxes
                    x_bar = x1 + 0.5 * w1
                    y_bar = y1 + 0.5 * h1

                    # CHECK IF CURRENT RECTANGLES LIES SOMWHERE IN THE PREVIOUS RECTANGLES
                    if ((t_x <= x_bar <= (t_x + t_w)) and (t_y <= y_bar <= (t_y + t_h)) and (x1 <= t_x_bar <= (x1 + w1 )) and ( y1 <= t_y_bar <= (y1 + h1  ))):
                        
                        # If it lies somewhere in the previous rectangle do not reset the tracker, keep tracking the previous one
                        #print ('RECT MATCHED - KEEP TRACKING - DONT RESET')
                        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
                        dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
                        # apply meanshift to get the new location
                        ret, track_window = cv2.meanShift(dst, track_window, term_crit)

                        x3,y3,w3,h3 = track_window

                        x3 = ((x1-x3)+x3)
                        y3 = ((y1-y3)+y3)
                        w3 = ((w1-w3)+w3)
                        h3 = ((h1-h3)+h3)

                        # draw tracking rectangles
                        cv2.rectangle(frame, (x3, y3),(w3, h3),rectangleColor ,2)
                        # copy current rects in tracking rects
                        (t_x , t_y , t_w , t_h) = curr_rect
                        #calculate the centerpoints
                        t_x_bar = t_x + 0.5 * t_w
                        t_y_bar = t_y + 0.5 * t_h
                       
                    else:
                        # If it does not lie in the previous rectangles , update the tracked and track the current/new one
                        #print('NO MATCHING RECTS - UPDATE TRACKER - UPDATE RECTS') 
                        track_window = curr_rect

                        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
                        dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
                        # apply meanshift to get the new location
                        ret, track_window = cv2.meanShift(dst, track_window, term_crit)

                        x3,y3,w3,h3 = track_window

                        x3 = ((x1-x3)+x3)
                        y3 = ((y1-y3)+y3)
                        w3 = ((w1-w3)+w3)
                        h3 = ((h1-h3)+h3)

                        # draw tracking rectangles
                        cv2.rectangle(frame, (x3, y3),(w3, h3),rectangleColor ,2)
                        
                        # copy current rects in tracking rects
                        (t_x , t_y , t_w , t_h) = curr_rect
                        #calculate the centerpoints
                        t_x_bar = t_x + 0.5 * t_w
                        t_y_bar = t_y + 0.5 * t_h 

                        # Every time we hav new detection or bounding box save it to upload on server
                        # Crop body from Original full resolution frame
                        body_big = frame[y1:h1, x1:w1]

                        # Uncomment this if you want your every detection on same aspect ratio i-e 1:2

                        '''
                        ####################################

                        im_shape = body_big.shape
                        #print('ORIGINAl Width: ',im_shape[0])
                        #print('ORIGINAL Height: ',im_shape[1])
                        aspect_ratio = float(float(im_shape[0]) / float(im_shape[1]))
                        #print('ORIGINAL Aspect Ratio: ',aspect_ratio)
                        ratio_check = float(1 / 1.67)
                        #print ('Aspect Ratio Threshold: ', ratio_check)

                        if aspect_ratio < (ratio_check) or aspect_ratio > (ratio_check):
                            new_width = ratio_check * float(im_shape[1])
                            #print('NEW width: ', new_width)
                            aspect_ratio = float(float(new_width) / float(im_shape[1]))
                            #print('NEW aspect ratio: ', aspect_ratio)
                            body_big = imutils.resize(body_big, width=int(new_width))
                            
                        ####################################
                        '''
                        # before saving the detected image first get current date and time to append it with name
                        cur_date = (time.strftime("%Y-%m-%d"))
                        cur_time = (time.strftime("%H:%M:%S"))
                        # Append date and time
                        new_pin =cur_date+"-"+cur_time
                        # any hardcoded name you want
                        filename1 = 'UNKNOWN'
                        # Append new_pin and hardcoded name to make final name 
                        filename2 = str(filename1)+'-'+str(new_pin)
                        # this is your final image with the path to where it is located
                        sampleFile = ('%s/%s.png' % (save_body_path, filename2))

                        #Save image in a folder, save_body_path has the full path to the folder where we want to save images
                        cv2.imwrite('%s/%s.png' % (save_body_path, filename2), body_big)

                        # For Face Detection read each image from the location where we saved it
                        person = cv2.imread(sampleFile)
                        # Pass the image to face detector, we are using dlib's face detector 
                        dets = detector(person, 1)

                        print("Number of faces detected: {}".format(len(dets)))

                        # Iterate through all of detected bounding boxes/faces
                        for i, d in enumerate(dets):
                            print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
                                i, d.left(), d.top(), d.right(), d.bottom()))

                            # Crop the faces/bounding boxes and save in a seperate folder for later use
                            crop = person[d.top():d.bottom(), d.left():d.right()]
                            # before saving the detected image first get current date and time to append it with name
                            cur_date = (time.strftime("%Y-%m-%d"))
                            cur_time = (time.strftime("%H:%M:%S"))
                            new_pin =cur_date+"-"+cur_time
                            facename1 = 'FACE'
                            facename2 = str(facename1)+'-'+str(new_pin)
                            sampleFace = ('%s/%s.png' % (save_faces_path, facename2))
                            #Save Image Here
                            cv2.imwrite('%s/%s.png' % (save_faces_path, facename2), crop)

                        # Put detected bodies in Queue... un comment below line if you have upload process running
                        # queue_BODIES.put(sampleFile)  
                    
        # show the output images
        cv2.imshow("Before NMS", orig)
        #cv2.imshow("After NMS", image)
        #cv2.imshow("ANZEN", frame)
        #cv2.imshow("thres", result_frame)

        # Always copy current frame to prev frame to consider it as a background for motion analysis
        frame1gray = frame2gray.copy()
            

        key = cv2.waitKey(10)
        if key == 27:
            break
def text_detect_and_recognition(img):
    #This two lines are for preserving data (image, height, and width) of the original image
    org = img
    (H, W) = img.shape[:2]

    #Setting the image to the correct parameters to be turned later into a blob
    (newW, newH) = (640, 320)
    rW = W / float(newW)
    rH = H / float(newH)

    image = cv2.resize(img, (newW, newH))
    (H, W) = img.shape[:2]

    #Layers of the neural net for detecting where the text is in the image
    layerNames = [
        "feature_fusion/Conv_7/Sigmoid",
        "feature_fusion/concat_3"]

    #Modifiying the image to the correct format in order to pass it through the neural net
    blob = cv2.dnn.blobFromImage(img, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True,
                                 crop=False)
    net.setInput(blob)

    #These two variable represent the output of the neural net
    (scores, geometry) = net.forward(layerNames)
    (numRows, numCol) = scores.shape[2:4]
    rects = []
    confidences = []

    '''This  for loop is for choosing the rectangles that have sufficient
    confidence as to if it does has text inside the rectangle. We save the parameters
    of this rect.'''
    for y in range(0, numRows):
        scoresData = scores[0, 0, y]

        xData0 = geometry[0, 0, y]

        xData1 = geometry[0, 1, y]

        xData2 = geometry[0, 2, y]

        xData3 = geometry[0, 3, y]

        anglesData = geometry[0, 4, y]

        for x in range(0, numCol):
            if scoresData[x] < 0.5:
                continue
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY + (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    boxes = non_max_suppression(np.array(rects), probs=confidences)
    '''This for loop is for actually drawing the bounding box in the original image and
    passing that region of interest (roi) through the pytesseract functions.'''
    for (startX, startY, endX, endY) in boxes:
        print(startX, startY, endX, endY )
        newstartX = int(startX * rW)
        newstartY = int(startY * rH)
        newendX = int(endX * rW)
        newendY = int(endY * rH)
        boundary = 5

        roi = org[newstartY - boundary: newendY + boundary, newstartX - boundary: newendX + boundary]


        text = cv2.cvtColor(roi.astype(np.uint8), cv2.COLOR_BGR2GRAY)
        cong = r'--oem 2'
        textRecognized = pytesseract.image_to_string(text)

        textRecognized = textRecognized.replace("\n", "")
        textRecognized = textRecognized[:-1]
        cv2.rectangle(org, (startX, startY), (endX, endY), (0, 255, 0), 2)
        org = cv2.putText(org, textRecognized, (endX, endY+5), cv2.FONT_ITALIC, fontScale=0.5, color=(0, 0, 0))

    return org
Example #41
0
def detect(args):
    # load the input image and grab the image dimensions
    image = args["image"]
    (H, W) = image.shape[:2]
    if image.ndim == 2:
        image = cv2.merge((image, image, image))
    orig = image.copy()

    # set the new width and height and then determine the ratio in change
    # for both the width and height
    (newW, newH) = (args["width"], args["height"])
    rW = W / float(newW)
    rH = H / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    # define the two output layer names for the EAST detector model that
    # we are interested -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
    # print("[INFO] loading EAST text detector...")
    net = cv2.dnn.readNet(args["east"])

    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (W, H), (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    start = time.time()
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    end = time.time()

    # show timing information on text prediction
    # print("[INFO] text detection took {:.6f} seconds".format(end - start))

    # grab the number of rows and columns from the scores volume, then
    # initialize our set of bounding box rectangles and corresponding
    # confidence scores
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        # loop over the number of columns
        for x in range(0, numCols):
            # if our score does not have sufficient probability, ignore it
            if scoresData[x] < args["min_confidence"]:
                continue

            # compute the offset factor as our resulting feature maps will
            # be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            # add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    # apply non-maxima suppression to suppress weak, overlapping bounding
    # boxes
    boxes = non_max_suppression(np.array(rects), probs=confidences)
    imgs = []
    # loop over the bounding boxes
    for (startX, startY, endX, endY) in boxes:
        # scale the bounding box coordinates based on the respective
        # ratios
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)
        # img = cv2.resize(orig[startY:endY,startX:endX], None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
        imgs.append(orig[startY:endY, startX:endX])

        # draw the bounding box on the image
        # cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
    return imgs


# show the output image
# cv2.imshow("Text Detection", orig)
# cv2.waitKey(0)
                            batch_locations,
                            labels,
                            min_prob=arguments['confidence'])

end = time.time()

print(f'[INFO] Detections took {end - start:.4f} seconds.')

for k in labels.keys():
    clone = resized.copy()

    for (box, prob) in labels[k]:
        (x_start, y_start, x_end, y_end) = box
        cv2.rectangle(clone, (x_start, y_start), (x_end, y_end), (0, 255, 0),
                      2)

    cv2.imshow('Without NMS', clone)
    clone = resized.copy()

    boxes = np.array([p[0] for p in labels[k]])
    proba = np.array([p[1] for p in labels[k]])
    boxes = non_max_suppression(boxes, proba)

    for (x_start, y_start, x_end, y_end) in boxes:
        cv2.rectangle(clone, (x_start, y_start), (x_end, y_end), (0, 0, 255),
                      2)

    print(f'[INFO] {k}: {len(boxes)}')
    cv2.imshow('With NMS', clone)
    cv2.waitKey(0)
Example #43
0
    orig = image.copy()

    # detect people in the image
    (rects, weights) = hog.detectMultiScale(image,
                                            winStride=(4, 4),
                                            padding=(8, 8),
                                            scale=1.05)

    # draw the original bounding boxes
    for (x, y, w, h) in rects:
        cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

    # apply non-maxima suppression to the bounding boxes using a
    # fairly large overlap threshold to try to maintain overlapping
    # boxes that are still people
    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

    # draw the final bounding boxes
    for (xA, yA, xB, yB) in pick:
        cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

    # show some information on the number of bounding boxes
    filename = imagePath[imagePath.rfind("/") + 1:]
    print("[INFO] {}: {} original boxes, {} after suppression".format(
        filename, len(rects), len(pick)))

    # show the output images
    cv2.imshow("Before NMS", orig)
    cv2.imshow("After NMS", image)
    cv2.waitKey(0)
Example #44
0
def MotionDetection(inVideo, firstFrame, lastFrame):
    count = 0
    cap = cv2.VideoCapture(inVideo)
    cap.set(cv2.CAP_PROP_POS_FRAMES, firstFrame)

    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fourcc = cv2.VideoWriter_fourcc(*'XVID')

    out = cv2.VideoWriter('../output/output.avi', fourcc, 20.0,
                          (frame_width, frame_height))

    ret, previous_frame = cap.read()
    frames = [previous_frame]
    median = np.median(frames, axis=0).astype(dtype=np.uint8)
    # Loop over all frames
    while cap.isOpened():
        # the read function gives two outputs. The check is a boolean function that returns if the video is being read
        ret, frame = cap.read()
        if not ret:
            break
        if count != lastFrame:
            if count % 3 == 0:
                if len(frames) == 3:
                    np.median(frames, axis=0).astype(dtype=np.uint8)
                elif len(frames) > 3:
                    frames = [frame]
                    median = np.median(frames, axis=0).astype(dtype=np.uint8)
            else:
                frames.append(frame)

        current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        previous_frame_gray = cv2.cvtColor(median, cv2.COLOR_BGR2GRAY)

        dframe = cv2.absdiff(current_frame_gray, previous_frame_gray)
        # Treshold to binarize
        th, dframe = cv2.threshold(dframe, 35, 255, cv2.THRESH_BINARY)
        # Morphological Operation
        dilated = cv2.dilate(dframe, None, iterations=4)
        opening = cv2.morphologyEx(dilated, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)

        (cnts, _) = cv2.findContours(closing, cv2.RETR_EXTERNAL,
                                     cv2.CHAIN_APPROX_SIMPLE)

        for contour in cnts:
            if cv2.contourArea(contour) < 100:
                # excluding too small contours. Set 10000 (100x100 pixels) for objects close to camera
                continue
            # obtain the corresponding bounding rectangle of our detected contour
            (x, y, w, h) = cv2.boundingRect(contour)

            offset = 30
            if x < offset:
                x = offset
            if y < offset:
                y = offset

            selection = current_frame_gray[y - offset:y + h + offset,
                                           x - offset:x + w + offset]

            cars = car_cascade.detectMultiScale(selection, 1.1, 1)
            people = people_cascade.detectMultiScale(selection, 1.1, 1)

            # apply non-maxima suppression to the bounding boxes using a
            # fairly large overlap threshold to try to maintain overlapping
            # boxes that are still people
            rects_cars = np.array([[x, y, x + w, y + h]
                                   for (x, y, w, h) in cars])
            rects_people = np.array([[x, y, x + w, y + h]
                                     for (x, y, w, h) in people])
            pick_cars = non_max_suppression(rects_cars,
                                            probs=None,
                                            overlapThresh=0.95)
            pick_people = non_max_suppression(rects_people,
                                              probs=None,
                                              overlapThresh=0.95)

            # TODO add object dection for other things.

            for i in range(len(pick_cars)):
                cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
                cv2.putText(frame, "C" + str(i + 1), (x, y),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0))

            for i in range(len(pick_people)):
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, "P" + str(i + 1), (x, y),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0))

            # this sadly does not work that's why its commented out
            # if not rects_cars.__contains__(contour) and not rects_people.__contains__(contour):
            #     cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
            #     cv2.putText(frame, "O", (x, y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0))

        out.write(frame)
        count += 1

    # Release video object
    cap.release()
    return str(pathlib.Path().absolute()) + "/output/output.avi"
def find_text_and_blur(frame, net, min_confidence):

    # load the input image and grab the image dimensions
    image = cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)
    orig = image.copy()
    (H, W) = image.shape[:2]

    # set the new width and height and then determine the ratio in change
    (newW, newH) = (round(W / 32) * 32, round(H / 32) * 32
                    )  # Round dimensions to nearest multiple of 32
    rW = W / float(newW)
    rH = H / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    # define the two output layer names for the EAST detector model that
    # we are interested -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    #net = cv2.dnn.readNet(eastPath)
    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (W, H), (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)

    # grab the number of rows and columns from the scores volume, then
    # initialize our set of bounding box rectangles and corresponding
    # confidence scores
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []
    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]
        # loop over the number of columns
        for x in range(0, numCols):
            # if our score does not have sufficient probability, ignore it
            if scoresData[x] < min_confidence:
                continue
            # compute the offset factor as our resulting feature maps will
            # be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)
            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]
            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)
            # add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    # apply non-maxima suppression to suppress weak, overlapping bounding
    # boxes
    boxes = non_max_suppression(np.array(rects), probs=confidences)
    # loop over the bounding boxes
    for (startX, startY, endX, endY) in boxes:
        # scale the bounding box coordinates based on the respective
        # ratios
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        # add blurring based on boxes
        box_dim_img = orig[startY:endY, startX:endX]
        blur = cv2.GaussianBlur(box_dim_img, (101, 101), 0)
        orig[startY:endY, startX:endX] = blur

    return orig
Example #46
0
def detector(video_capture, ww, hh, M, ppl_size, ROI_1, ROI_2, video_input = False):
    global display, sampling, NMS    
    
    if (sampling == True):
        global image
        
        
    if (video_input == True):
        global frame_skip 
        for i in range (frame_skip):
            video_capture.read()
        
    ret, image = video_capture.read()
    
    #rotate
    if (M != None):
        image = cv2.warpAffine(image, M, (ww, hh))
     
    #resize
    image = imutils.resize(image, width=min(400, image.shape[1]))
    
    ##mask after resize    
    image = image[ROI_1[1]:ROI_2[1],ROI_1[0]:ROI_2[0]] 
    
    #Display - origin 
    orig = image.copy()
    
    #detect
    (rects, weights) = hog.detectMultiScale(image, winStride=(8, 8),
     padding=(8, 8), scale=1.05)
    
    ##delete large rect
    i = 0
    while (i < len(rects)):
        if (rects[i][2] > ppl_size[0] or rects[i][2] < ppl_size[1]):
            if (display == True):
                [x,y,w,h] = rects[i]
                cv2.rectangle(orig, (x, y), (x + w, y + h), (255, 0, 0), 2)
            rects = np.delete(rects,i,0)
        else:
            if (display == True):
                [x,y,w,h] = rects[i]
                cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
            i += 1
     
    #Display - origin 
#    for (x, y, w, h) in rects:
#        #box size validation
#        print ('w = ' + str(w))
#        if (w < 100):
#            cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
    
    #Display - origin 
    if (display == True): cv2.imshow('HOG', orig)
    
    #combine rectangle
    if (NMS == True):
        rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
        pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
        
        # draw the final bounding boxes
        for (xA, yA, xB, yB) in pick:
            cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)
        
        #Display - result
        cv2.imshow("HOG + NMS", image)
    
    people = len(rects);
	    
    if (sampling == True):
        if(random.random() > 0.9):
                #print ("save")
                fname = "./save/all_" + time.strftime("%m_%d_%H_%M_%S")+ ".jpg"
                cv2.imwrite(fname,image)
                
    if people > 0:
        return True
    else:
        return False
Example #47
0
def getEnhanced(img):
    height = img.shape[1]
    width = img.shape[0]
    ratio = width / height
    newWidth = 960
    newHeight = int(((newWidth * ratio) // 32) * 32)
    print(newHeight)
    img = cv2.resize(img, (newWidth, newHeight))
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]
    net = cv2.dnn.readNet("frozen_east_text_detection.pb")
    blob = cv2.dnn.blobFromImage(img,
                                 1.0, (newWidth, newHeight),
                                 (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

    # loop over the number of columns
    for x in range(0, numCols):
        # if our score does not have sufficient probability, ignore it
        if scoresData[x] < 0.5:
            continue

        # compute the offset factor as our resulting feature maps will
        # be 4x smaller than the input image
        (offsetX, offsetY) = (x * 4.0, y * 4.0)

        # extract the rotation angle for the prediction and then
        # compute the sin and cosine
        angle = anglesData[x]
        cos = np.cos(angle)
        sin = np.sin(angle)

        # use the geometry volume to derive the width and height of
        # the bounding box
        h = xData0[x] + xData2[x]
        w = xData1[x] + xData3[x]

        # compute both the starting and ending (x, y)-coordinates for
        # the text prediction bounding box
        endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
        endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
        startX = int(endX - w)
        startY = int(endY - h)

        # add the bounding box coordinates and probability score to
        # our respective lists
        rects.append((startX, startY, endX, endY))
        confidences.append(scoresData[x])
        # apply non-maxima suppression to suppress weak, overlapping bounding
        # boxes
        boxes = non_max_suppression(np.array(rects), probs=confidences)

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # scale the bounding box coordinates based on the respective
            # ratios
            startX = int(startX)
            startY = int(startY)
            endX = int(endX)
            endY = int(endY)
            print("text")
            # draw the bounding box on the image
            cv2.rectangle(img, (startX, startY), (endX, endY), (0, 255, 0), 2)

    return img
def main():
	# initialize the HOG descriptor/person detector
	camera = cv2.VideoCapture(0); 
	time.sleep(0.25)
	hog = cv2.HOGDescriptor()
	hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

	Threshold = 0
	features_number = 0

	while True: # main loop

		tracked_features = None

		while True: # detection loop, loop over the images

			unchangedPointsMap = dict()

			# load the image and resize it to (1) reduce detection time
			# and (2) improve detection accuracy
			(grabbed, current_frame) = camera.read()
			current_frame = imutils.resize(current_frame, width = 300)
			current_frame_copy = current_frame.copy()
			current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
		 
			# detect people in the image
			(rects, weights) = hog.detectMultiScale(current_frame, winStride=(4, 4),
				padding=(8, 8), scale=1.5)
		 
			# draw the original bounding boxes
			
			for i in range(len(rects)):
				x, y, w, h = rects[i]
				rects[i][0] = x + 15
				rects[i][1] = y + 40
				rects[i][2] = w - 30
				rects[i][3] = h - 20

			for (x, y, w, h) in rects:
				cv2.rectangle(current_frame_copy, (x, y), (x + w, y + h), (0, 0, 255), 2)
		 
			# apply non-maxima suppression to the bounding boxes using a
			# fairly large overlap threshold to try to maintain overlapping
			# boxes that are still people
			rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
			pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
		 
			# draw the final bounding boxes
			for (xA, yA, xB, yB) in pick:
				cv2.rectangle(current_frame, (xA, yA), (xB, yB), (0, 255, 0), 2)
			
			print("{} original boxes, {} after suppression".format(len(rects), len(pick)))
			
		# 	if len(rects) > 0:
		# 		features = find_features(current_frame, rects[0], 0)
		# 		print("NUM" + str(features_number))
		# 		break

		# 	# cv2.imshow("HOG", current_frame_copy)
		# 	# key = cv2.waitKey(1) & 0xFF
			
		# 	# if key == ord("w"):
		# 	# 	break

		# features_number = len(features)
		# Threshold = features_number * threshold_percent
			
		# while True: # Tracking loop

		# 	#print ("Threshold" + str(Threshold))
		# 	if features_number < Threshold:
		# 		print ("Features less than threshold")
		# 		break
		# 	else:
		# 		(grabbed, next_frame) = camera.read()
		# 		next_frame = imutils.resize(next_frame, width = 300)
		# 		if not grabbed:
		# 			print ("Camera read failed")
		# 			return
		# 		current_frame_copy = next_frame.copy()
		# 		next_frame = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY)

	 # 			#-----------Tracking using LK ---------------------------

	 # 			try:
	 # 				features = np.array(features)
	 # 				#print("Features" + str(features))
	 # 				(tracked_features, status, feature_errors) = cv2.calcOpticalFlowPyrLK(current_frame, next_frame, features, None, **lk_params)
		# 			#print("TEST")
		# 			# print("KEYS" + str(unchangedPointsMap.keys()))
		# 			# for i in range(len(tracked_features[0])):
		# 			# 	f = tracked_features[0][i]
		# 			# 	x = round(f[0])
		# 			# 	y = round(f[1])
		# 			# 	print("x and y" + str((x,y)))
		# 			# 	if (x,y) in unchangedPointsMap.keys():
		# 			# 		unchangedPointsMap[(x,y)] += 1
		# 			# 		print("ADDED" + str(unchangedPointsMap[(x,y)]))
		# 			# 		if unchangedPointsMap[(x,y)] == 30:
		# 			# 			print ("BEFORE" + str(tracked_features[0]))
		# 			# 			tracked_features = np.delete(tracked_features,i,0)
		# 			# 			unchangedPointsMap.pop((x,y))
		# 			# 			print ("AFTER" + str(tracked_features[0]))
		# 			# 	else:
		# 			# 		unchangedPointsMap[(x,y)] = 0

		# 			# print("BEFORE" + str(tracked_features))
		# 			# tracked_features[tracked_features[:,0].argsort()]
		# 			# print("AFTER" + str(tracked_features))

		# 			arr_x = []
		# 			arr_y = []

		# 			for i in range(len(tracked_features)):
		# 				f = tracked_features[i]
		# 				x = f[0][0]
		# 				y = f[0][1]

		# 				arr_x.append(x)
		# 				arr_y.append(y)

		# 			print("X_arr" + str(arr_x))
		# 			print("Y_arr" + str(arr_y))
		# 			print ("X SORTED " + str(sorted(arr_x)))
		# 			print ("Y SORTED " + str(sorted(arr_y)))

		# 			arr_x = sorted(arr_x)
		# 			arr_y = sorted(arr_y)

		# 			mid = len(arr_x)/2
		# 			X = arr_x[mid]
		# 			mid = len(arr_y)/2
		# 			Y = arr_y[mid]

		# 			new_feature_number = 0
		# 			temp_set_number = []
		# 			temp_distance = []
		# 			j = 0
		# 			for i in range(features_number):
		# 				if status[i] == 1:
		# 					new_feature_number += 1
		# 					#temp_set_number.append()
		# 					#temp_distance.append(height_from_floor[i])
		# 					j += 1
					
		# 			#height_from_floor = temp_distance
		# 			features_number = new_feature_number
		# 			#print("Features_num" + str(features_number))
		# 			features = []

		# 			for i in range(features_number):
		# 				features.append(tracked_features[i])

		# 			features = np.array(features)
		# 			tracked_features = []
		# 			current_frame = next_frame.copy()
	 # 			except Exception, e:
	 # 				raise e

	 # 			#-------Showing Points ------------------------
	 # 			for i in range(features_number):
	 # 				# print ("features " + str(features)) 
	 # 				# print ("features0 " + str(features[0]))
	 # 				# print ("features00 " + str(features[0][0]))
	 # 				# print ("features000 " + str(features[0][0][0]))

	 # 				#print ("features " + str(features[i]))
	 # 				cv2.circle(current_frame_copy,
	 # 						   tuple(features[i][0]),
	 # 						   3,
	 # 						   255,
	 # 						   -1)

	 # 			cv2.circle(current_frame_copy,
	 # 						(X,Y),
	 # 						5,
	 # 						(0,0,255),
	 # 						-1)

			# show the output images
			cv2.imshow("HOG", current_frame_copy)
			key = cv2.waitKey(1) & 0xFF
			
			if key == ord("w"):
				break
			
	camera.release()
	cv2.destroyAllWindows()
def text_detector(image):
    # resize for EAST usage
    image = cv2.resize(image, (640, 320), interpolation=cv2.INTER_AREA)
    orig = image
    
    # could be optimized later, leave out extra copy
    # orig_for_crop = image.copy()

    (H, W) = image.shape[:2]

    (newW, newH) = (640, 320)
    rW = W / float(newW)
    rH = H / float(newH)

    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    layerNames = [
        "feature_fusion/Conv_7/Sigmoid",
        "feature_fusion/concat_3"]

    blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
                                 (123.68, 116.78, 103.94), swapRB=True, crop=False)

    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    for y in range(0, numRows):

        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        # loop over the number of columns
        for x in range(0, numCols):
            # if our score does not have sufficient probability, ignore it
            if scoresData[x] < 0.5:
                continue

            # compute the offset factor as our resulting feature maps will
            # be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            padding = 5
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))+padding
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))+padding
            startX = int(endX - w)-padding
            startY = int(endY - h)-padding

            # add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    boxes = non_max_suppression(np.array(rects), probs=confidences)
    # count = 0
    cropped_imgs = []
    for (startX, startY, endX, endY) in boxes:
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)
        # crop_object(orig, startX, startY, endX, endY, count)
        # count += 1
        
        cropped_img = orig[int(startY):int(endY), int(startX):int(endX)]
        cropped_imgs.append(cropped_img)
        # draw the bounding box on the image
        # cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 3)

    return cropped_imgs
Example #50
0
def image_processing(image, orig):
    # resize the original image to new dimensions
    (origH, origW) = image.shape[:2]
    height = 320
    width = 320
    min_confidence = 0.5
    # set the new height and width to default 320 by using args #dictionary.
    (newW, newH) = width, height

    # Calculate the ratio between original and new image for both height and weight.
    # This ratio will be used to translate bounding box location on the original image.
    rW = origW / float(newW)
    rH = origH / float(newH)
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    # construct a blob from the image to forward pass it to EAST model
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (W, H), (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)
    east = "models/frozen_east_text_detection.pb"
    # load the pre-trained EAST model for text detection
    net = cv2.dnn.readNet(east)

    # The following two layer need to pulled from EAST model for achieving this.
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # Forward pass the blob from the image to get the desired output layers
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)

    # Returns a bounding box and probability score if it is more than minimum confidence
    # Find predictions and  apply non-maxima suppression
    (boxes, confidence_val) = predictions(scores, geometry)
    boxes = non_max_suppression(np.array(boxes), probs=confidence_val)

    ##Text Detection and Recognition

    # initialize the list of results
    results = []

    # loop over the bounding boxes to find the coordinate of bounding boxes
    for (startX, startY, endX, endY) in boxes:
        # scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        # extract the region of interest
        r = orig[startY:endY, startX:endX]

        # configuration setting to convert image to string.
        configuration = "-l eng --oem 1 --psm 8"
        ##This will recognize the text from the image of bounding box
        text = pytesseract.image_to_string(r, config=configuration)

        # append bbox coordinate and associated text to the list of results
        results.append(((startX, startY, endX, endY), text))

    # Display the image with bounding box and recognized text
    orig_image = orig.copy()

    # Moving over the results and display on the image
    for ((start_X, start_Y, end_X, end_Y), text) in results:
        # display the text detected by Tesseract
        print("{}\n".format(text))

        # Displaying text
        text = "".join([x if ord(x) < 128 else "" for x in text]).strip()
        cv2.rectangle(orig_image, (start_X, start_Y), (end_X, end_Y),
                      (0, 0, 255), 2)
        cv2.putText(
            orig_image,
            text,
            (start_X, start_Y - 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0, 0, 255),
            2,
        )
    # print(f"boxes are{boxes}")
    plt.imshow(orig_image)
    plt.title("Output")
    plt.show()
    cv2.imshow("Original", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    return boxes
car_cascade = cv2.CascadeClassifier(cascPath)

# Read the image
image = cv2.imread(imagePath)

# Resize the image so it fits in the screen
image1 = imutils.resize(image, height=500)
gray = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)

# Detect faces in the image
faces = car_cascade.detectMultiScale(
    gray,
    scaleFactor=1.1,
    minNeighbors=5,
    minSize=(30, 30),
    # flags = cv2.cv.CV_HAAR_SCALE_IMAGE
    flags=0
)
face = non_max_suppression(faces, probs=None, overlapThresh=0.3)
if format(len(faces)) == 1:
    print("Found {0} face!".format(len(faces)))
else:
    print("Found {0} faces!".format(len(faces)))

# Draw a rectangle around the faces
for (x, y, w, h) in face:
    cv2.rectangle(image1, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow("Faces found", image1)
cv2.waitKey(0)
Example #52
0
def det_txt_ocr(img_path):
	try:
		# load the input image and grab the image dimensions
		image = cv2.imread(img_path)

		#angle for rotation
		# fix------
		ag = 357
		# fix------

		#rotation
		num_rows, num_cols = image.shape[:2]
		rotation_matrix = cv2.getRotationMatrix2D((num_cols / 2, num_rows / 2), ag, 1)
		image = cv2.warpAffine(image, rotation_matrix, (num_cols, num_rows))

		orig = image.copy()
		(origH, origW) = image.shape[:2]

		# set the new width and height and then determine the ratio in change
		# for both the width and height
		# fix ---------------------------
		inW = 320
		inH = 160
		# -------------------------------

		(newW, newH) = (inW,inH)
		rW = origW / float(newW)
		rH = origH / float(newH)

		# resize the image and grab the new image dimensions
		image = cv2.resize(image, (newW, newH))
		(H, W) = image.shape[:2]

		# define the two output layer names for the EAST detector model that
		# we are interested -- the first is the output probabilities and the
		# second can be used to derive the bounding box coordinates of text


		# construct a blob from the image and then perform a forward pass of
		# the model to obtain the two output layer sets
		# fix ---------------------------
		blobsize = 0.5
		# -------------------------------

		blob = cv2.dnn.blobFromImage(image,blobsize, (W, H),
			(123.68, 116.78, 103.94), swapRB=True, crop=False)
		netdet.setInput(blob)
		(scores, geometry) = netdet.forward(layerNames)

		# decode the predictions, then  apply non-maxima suppression to
		# suppress weak, overlapping bounding boxes
		(rects, confidences) = decode_predictions(scores, geometry)
		boxes = non_max_suppression(np.array(rects),probs=confidences,)

		results = []

		# 1 round
		for (startX, startY, endX, endY) in boxes:
			# scale the bounding box coordinates based on the respective
			# ratios
			startX = int(startX * rW)
			startY = int(startY * rH)
			endX = int(endX * rW)
			endY = int(endY * rH)

			# in order to obtain a better OCR of the text we can potentially
			# apply a bit of padding surrounding the bounding box -- here we
			# are computing the deltas in both the x and y directions
			# fix ---------------------------
			pX = 0.0
			pY = 0.2
			# -------------------------------

			dX = int((endX - startX) * pX)
			dY = int((endY - startY) * pY)

			# apply padding to each side of the bounding box, respectively
			startX = 0
			startY = max(0, startY - dY)
			endX = origW
			endY = min(origH, endY + (dY * 2))

			# extract the actual padded ROI
			roi = orig[startY:endY, startX:endX]
			pd = orig[endY:origH, startX:endX]

			# in order to apply Tesseract v4 to OCR text we must supply
			pytesseract.pytesseract.tesseract_cmd = r'Tesseract-OCR\tesseract'
			config = ("-l thafast --oem 1 --psm 7")
			text = pytesseract.image_to_string(roi, config=config)

			# add the bounding box coordinates and OCR'd text to the list
			# of results
			results.append(((startX, startY, endX, endY), text))
			# just 1r
			break

		# sort the results bounding box coordinates from top to bottom
		results = sorted(results, key=lambda r:r[0][1])

		# loop over the results
		for ((startX, startY, endX, endY), text) in results:
			# display the text OCR by Tesseract
			x = list(text)
			for i, item in enumerate(x):
				if ord(item) == 46 or ord(item) == 91 or ord(item) == 93:
					x[i] = " "
				if ord(item) == 124:
					x[i] = " "
				if ord(item) > 3630:
					x[i] = " "
			# text out put
			text_out = "".join([c if ord(c) > 44 else "" for c in x]).strip()
			# print("--OCR--")
			#print(text_out)

			output = orig.copy()
			cv2.rectangle(output, (startX, startY), (endX, endY),(127, 255, 0), 1)
			# sv pd
			idx = 1
			write_name = r'pd\pd_' + str(idx) + '.png'
			cv2.imwrite(write_name, pd)
			idx = + 1
			# show the output image
			# cv2.imshow("Text Detection", output)
			# cv2.moveWindow("Text Detection", 600, 300)
			# cv2.imshow("Text ROI", roi)
			# cv2.moveWindow("Text ROI", 600, 400)
			# cv2.imshow("PV", pv)
			# cv2.moveWindow("PV", 600, 500)
			# (origH2, origW2) = pv.shape[:2]
			# print(origH2)
			# print(origW2)
			cv2.waitKey(0)

			return text_out
	except Exception as e:
		text_out = "type error: " + str(e)
		# print("can't detect text")
		return text_out
imagePath = "/home/pi/Desktop/Canteen/canteen145.jpe"

image = cv2.imread(imagePath)
image = imutils.resize(image, width=min(800, image.shape[1]))
orig = image.copy()

(rects, weights) = hog.detectMultiScale(image,
                                        winStride=(4, 4),
                                        padding=(4, 4),
                                        scale=1.05)  #padding(8,8) , scale 1.01

for (x, y, w, h) in rects:
    cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
pick = non_max_suppression(rects, probs=None,
                           overlapThresh=0.65)  #0.65 bigger value less overlap

for (xA, yA, xB, yB) in pick:
    cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

filename = imagePath  #[imagePath.rfind("/")+1:]
print("[INFO] {}: {} original boxes, {} after suppression".format(
    filename, len(rects), len(pick)))

#cv2.imshow("Before NMS" , orig)
#cv2.imshow("After NMS" , image)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
cv2.imwrite('tuning_canteenpic.jpeg', image)
cv2.destroyAllWindows()
Example #54
0
def OCR():
    
    cv2.destroyAllWindows()
    engine.say("A4 mode press 13 and for medicine mode press 11")
    engine.runAndWait()
    engine.stop()
    #///////////////////////////////////////////////////////
    #ocr_mode = int(input('''Enter the mode of the OCR operation: A4 Papers: 1 Medicine: 2 '''))
    ocr_run = True
    while ocr_run:
        sleep(.25)
        if GPIO.input(11)==GPIO.HIGH:
            print("Medicine Mode activated")
            ###################################################################################
            image = cv2.imread('1.jpg', cv2.IMREAD_COLOR)
            print("Image loaded ")
            # ///////////////////////////////////////////////////////
            orig = image.copy()
            (origH, origW) = image.shape[:2]

            # set the new width and height and then determine the ratio in change
            # for both the width and height
            (newW, newH) = (int(320), int(320))
            rW = origW / float(newW)
            rH = origH / float(newH)

            # resize the image and grab the new image dimension2s
            image = cv2.resize(image, (newW, newH))
            (H, W) = image.shape[:2]

            # define the two output layer names for the EAST detector model that
            # we are interested -- the first is the output probabilities and the
            # second can be used to derive the bounding box coordinates of text
            layerNames = [
                "feature_fusion/Conv_7/Sigmoid",
                "feature_fusion/concat_3"]

            # load the pre-trained EAST text detector
            print("[INFO] loading EAST text detector...")
            net = cv2.dnn.readNet("/home/pi/Desktop/OCR_TTS-master/frozen_east_text_detection.pb")

            # construct a blob from the image and then perform a forward pass of
            # the model to obtain the two output layer sets
            blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
                                         (123.68, 116.78, 103.94), swapRB=True, crop=False)
            net.setInput(blob)

            (scores, geometry) = net.forward(layerNames)

            # decode the predictions, then  apply non-maxima suppression to
            # suppress weak, overlapping bounding boxes
            (rects, confidences) = decode_predictions(scores, geometry)
            boxes = non_max_suppression(np.array(rects), probs=confidences)

            final_list = []
            text_empty = ''
            # loop over the bounding boxes
            for (startX, startY, endX, endY) in boxes:
                # scale the bounding box coordinates based on the respective
                # ratios
                startX = int(startX * rW)
                startY = int(startY * rH)
                endX = int(endX * rW)
                endY = int(endY * rH)
                dX = int((endX - startX) * float(0))
                dY = int((endY - startY) * float(0))
                startX = max(0, startX - dX)
                startY = max(0, startY - dY)
                endX = min(origW, endX + (dX * 2))
                endY = min(origH, endY + (dY * 2))
                roi = orig[startY:endY, startX:endX]
                ########################################################################

                text = pytesseract.image_to_string(
                    roi, config="-l eng --oem 1 --psm 11")
                print("for:" + text)

                text_empty = text_empty +text + " "
            print(text_empty)
            engine.say(text_empty)
            engine.runAndWait()
            engine.stop()
            exit_loop = True
            sleep(1)
            engine.say("repeat press the same button ")
            engine.runAndWait()
            engine.stop()

            while exit_loop:
               
                sleep(.25)
                if GPIO.input(11)==GPIO.HIGH:

                    engine.say(text_empty)
                    engine.runAndWait()
                    engine.stop()
                elif GPIO.input(13)==GPIO.HIGH:
                     ocr_run=False
                     
                    
                     exit_loop = False
                     engine.stop()
                    
                else:
                    pass    


            #############################################################################
        elif GPIO.input(13) == GPIO.HIGH:

            print("A4 Mode")
            # ---------------------------Load Imagge---------------------------#
            img = cv2.imread('1.png', cv2.IMREAD_COLOR)
            # ---------------------------GreyScale Imagge---------------------------#
            # convert to grey to reduce detials
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # /////////////////////////////////////////////////////////////////
            # ---------------------------Filter1 Imagge---------------------------#
            gray = cv2.bilateralFilter(gray, 11, 17, 17)  # Blur to reduce noise
            # /////////////////////////////////////////////////////////////////
            # ---------------------------Thresholding Imagge---------------------------#
            gray = cv2.adaptiveThreshold(
                gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
            # /////////////////////////////////////////////////////////////////
            # ---------------------------Result---------------------------#
            original = pytesseract.image_to_string(gray, config=' -l eng --oem 1 ')
            print(original)

            engine.say("words detected are "+original)
            engine.runAndWait()
            engine.stop()
            exit_loop = True
            
            sleep(1)
            engine.say("repeat press the same button")
            engine.runAndWait()
            engine.stop()
            
            while exit_loop:
                sleep(.25)
                if GPIO.input(13)==GPIO.HIGH:

                    engine.say(original)
                    engine.runAndWait()
                    engine.stop()
                elif GPIO.input(11)==GPIO.HIGH:
                    ocr_run = False
                    exit_loop = False
                    engine.stop()
                    
                else:
                    pass
                     
        elif (GPIO.input(15)==GPIO.HIGH):
            break
        
        else:
            pass
Example #55
0
    def recognize(self, image):
        # grab the image dimensions
        image = imutils.resize(image, width=320)
        orig = image.copy()
        (origH, origW) = image.shape[:2]

        # set the new width and height and then determine the ratio in change
        # for both the width and height
        (newW, newH) = (self.config_width, self.config_height)
        rW = origW / float(newW)
        rH = origH / float(newH)

        # resize the image and grab the new image dimensions
        image = cv2.resize(image, (newW, newH))
        (H, W) = image.shape[:2]

        # define the two output layer names for the EAST detector model that
        # we are interested -- the first is the output probabilities and the
        # second can be used to derive the bounding box coordinates of text
        layer_names = [
            "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"
        ]

        # construct a blob from the image and then perform a forward pass of
        # the model to obtain the two output layer sets
        blob = cv2.dnn.blobFromImage(image,
                                     1.0, (W, H), (123.68, 116.78, 103.94),
                                     swapRB=True,
                                     crop=False)
        self.net.setInput(blob)
        (scores, geometry) = self.net.forward(layer_names)

        # decode the predictions, then  apply non-maxima suppression to
        # suppress weak, overlapping bounding boxes
        (rects, confidences) = self.decode_predictions(scores, geometry)
        boxes = non_max_suppression(np.array(rects), probs=confidences)

        # initialize the list of results
        results = []

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # scale the bounding box coordinates based on the respective
            # ratios
            startX = int(startX * rW)
            startY = int(startY * rH)
            endX = int(endX * rW)
            endY = int(endY * rH)

            # in order to obtain a better OCR of the text we can potentially
            # apply a bit of padding surrounding the bounding box -- here we
            # are computing the deltas in both the x and y directions
            padding = self.pyconfig.getfloat('text_recognition', 'padding')
            dX = int((endX - startX) * padding)
            dY = int((endY - startY) * padding)

            # apply padding to each side of the bounding box, respectively
            startX = max(0, startX - dX)
            startY = max(0, startY - dY)
            endX = min(origW, endX + (dX * 2))
            endY = min(origH, endY + (dY * 2))

            # extract the actual padded ROI
            roi = orig[startY:endY, startX:endX]

            # in order to apply Tesseract v4 to OCR text we must supply
            # (1) a language, (2) an OEM flag of 4, indicating that the we
            # wish to use the LSTM neural net model for OCR, and finally
            # (3) an OEM value, in this case, 7 which implies that we are
            # treating the ROI as a single line of text
            config = "-l eng --oem 0 -c tessedit_char_whitelist=123ABCDEIGNR --psm 8"
            text = pytesseract.image_to_string(roi, config=config)

            # add the bounding box coordinates and OCR'd text to the list
            # of results
            results.append(((startX, startY, endX, endY), text))

        # sort the results bounding box coordinates from top to bottom
        results = sorted(results, key=lambda r: r[0][1])

        output_data = []
        # loop over the results
        for ((startX, startY, endX, endY), text) in results:
            # strip out non-ASCII text so we can draw the text on the image
            # using OpenCV, then draw the text and a bounding box surrounding
            # the text region of the input image
            text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
            output_data.append(text)

        return output_data
]

# load the pre-trained EAST text detector
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])

# construct the blob from the image and then forward pass of the
# model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB = True, crop = False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)

# decode the predictions, then apply NMS to suppress weak,
# overlapping bounding boxes
(rects, confidences) = decode_predictions(scores, geometry)
boxes = non_max_suppression(np.array(rects), probs = confidences)

# initialise the list of results
results = []

# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
    # scale the bounding box coordinates based on the respective ratios
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)

    # applying padding surrounding to bounding box
    dX = int((endX - startX) * args["padding"])
    dY = int((endY - startY) * args["padding"])
Example #57
0
def Bemoji (imagesrc):
    image = np.array(imagesrc)

    # set B emoji scaling
    scalefactor = 2
    scalevar = (scalefactor - 1)/2

    # load the input image and grab the image dimensions
    min_conf = 0.1
    eastpath = "frozen_east_text_detection.pb"
    orig = image.copy()
    (H, W) = image.shape[:2]
    (newW, newH) = (640, 640)
    rW = W / float(newW)
    rH = H / float(newH)

    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]

    # define the two output layer names for the EAST detector model that
    # we are interested -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = [
        "feature_fusion/Conv_7/Sigmoid",
        "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
    print("[INFO] loading EAST text detector...")
    net = cv2.dnn.readNet(eastpath)

    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
                                 (123.68, 116.78, 103.94), swapRB=True, crop=False)
    start = time.time()
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    end = time.time()

    # show timing information on text prediction
    print("[INFO] text detection took {:.6f} seconds".format(end - start))

    # grab the number of rows and columns from the scores volume, then
    # initialize our set of bounding box rectangles and corresponding
    # confidence scores
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    # loop over the number of rows
    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        # loop over the number of columns
        for x in range(0, numCols):
            # if our score does not have sufficient probability, ignore it
            if scoresData[x] < min_conf:
                continue

            # compute the offset factor as our resulting feature maps will
            # be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            # add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    # apply non-maxima suppression to suppress weak, overlapping bounding
    # boxes
    boxes = non_max_suppression(np.array(rects), probs=confidences)

    Bimage = cv2.imread("B.png", -1)
    b, g, r, a = cv2.split(Bimage)
    Bimage = cv2.merge((r, g, b, a))
    print("[INFO] loading Tesseract...")
    start = time.time()
    for (startX, startY, endX, endY) in boxes:
        roi = image[startY:endY, startX:endX]
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        preprocess = "thresh"
        # check to see if we should apply thresholding to preprocess the
        # image
        if preprocess == "thresh":
            gray = cv2.threshold(gray, 0, 255,
                                 cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

        # make a check to see if median blurring should be done to remove
        # noise
        elif preprocess == "blur":
            gray = cv2.medianBlur(gray, 3)

        # write the grayscale image to disk as a temporary file so we can
        # apply OCR to it
        filename = "{}.png".format(os.getpid())

        if gray is not None:
            cv2.imwrite(filename, gray)
            text = pytesseract.image_to_boxes(Image.open(filename))
            os.remove(filename)
        else:
            text = ""

        print(text)
        text2 = str.split(text)

        if text2:
            dX = [int(text2[x*6 + 1]) for x in range (0, (int(len(text2)/6)))]
            dY = [int(text2[x * 6 + 2]) for x in range (0, (int(len(text2)/6)))]
            dW = [int(text2[x * 6 + 3])-int(text2[x*6 + 1]) for x in range (0, (int(len(text2)/6)))]
            dH = [int(text2[x * 6 + 4])-int(text2[x * 6 + 2]) for x in range (0, (int(len(text2)/6)))]
            #print(str(dW))
            startX = [int((startX + dX[x]) * rW) for x in range (0, (int(len(text2)/6)))]
            startY = [int((startY + dY[x]) * rH) for x in range (0, (int(len(text2)/6)))]

            letter = [text2[x * 6] for x in range (0, (int(len(text2)/6)))]

            for x in range (0, (int(len(text2)/6))):
                if (letter[x] == "G") | (letter[x] == "g") | (letter[x] == "B") | (letter[x] == "b") | (letter[x] == "P") | (letter[x] == "p"):
                    exception = 0
                    try:
                        placeimage(orig, Bimage, startX[x]-int(scalevar*(dW[x]*rH)),
                                   startY[x]-int(scalevar*(dH[x]*rW)), int(dW[x]*rH)*scalefactor,
                                   int(dH[x]*rW)*scalefactor)
                    except ValueError:
                        exception = 1
                    if exception:
                        scalecount = scalefactor - 0.1
                        while scalecount > 0:
                            try:
                                placeimage(orig, Bimage, startX[x] - int(((scalecount - 1) / 2) * (dW[x] * rH)),
                                           startY[x] - int(((scalecount - 1) / 2) * (dH[x] * rW)),
                                           int(dW[x] * rH * scalecount),
                                           int(dH[x] * rW * scalecount))
                            except ValueError:
                                scalecount = scalecount - 0.1
                                print(str(letter[x])+" " +str(scalecount))
                            else:
                                break


    end = time.time()
    print("[INFO] letter detection took {:.6f} seconds".format(end - start))
    imagesrc = Image.fromarray(orig.astype('uint8'))
    return imagesrc
Example #58
0
while(True):
        
    ret,frame=cap.read()

    frame = imutils.resize(frame, width=min(400, frame.shape[1]))

    orig = frame.copy()

    (rects, weights) = hog.detectMultiScale(frame, winStride=(4, 4), padding=(8, 8), scale=1.05)

    for (x, y, w, h) in rects:
      cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

    rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

    for (xA, yA, xB, yB) in pick:
      cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2)



    cv2.imshow('frame', frame)


	count = count + len(pick)

    print count
        
    ch = 0xFF & cv2.waitKey(1)
    if ch == 27:
Example #59
0
def main():

    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

    video = cv2.VideoCapture(0)
    is_ok, bgr_image_input = video.read()

    if not is_ok:
        print("Cannot read video source")
        sys.exit()

    height = bgr_image_input.shape[0]
    width = bgr_image_input.shape[1]

    try:
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        fname = "OUTPUT.avi"
        fps = 30.0
        videoWriter = cv2.VideoWriter(fname, fourcc, fps, (width, height))
    except:
        print("Error: can't create output video: %s" % fname)
        sys.exit()

    fps = video.get(cv2.CAP_PROP_FPS)

    start = time.time()

    frame = 0
    while True:
        is_ok, bgr_image_input = video.read()
        if not is_ok:
            break

        frame = frame + 1
        # load the image and resize it to (1) reduce detection time
        # and (2) improve detection accuracy
        bgr_image_input = imutils.resize(bgr_image_input,
                                         width=min(400,
                                                   bgr_image_input.shape[1]))
        orig = bgr_image_input.copy()
        # detect people in the image
        (rects, weights) = hog.detectMultiScale(bgr_image_input,
                                                winStride=(4, 4),
                                                padding=(8, 8),
                                                scale=1.05)
        # draw the original bounding boxes
        for (x, y, w, h) in rects:
            cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)
        # apply non-maxima suppression to the bounding boxes using a
        # fairly large overlap threshold to try to maintain overlapping
        # boxes that are still people
        rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
        pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
        # draw the final bounding boxes
        for (xA, yA, xB, yB) in pick:
            cv2.rectangle(bgr_image_input, (xA, yA), (xB, yB), (0, 255, 0), 2)
        # show some information on the number of bounding boxes
        '''filename = imagePath[imagePath.rfind("/") + 1:]
        print("[INFO] {}: {} original boxes, {} after suppression".format(
            filename, len(rects), len(pick)))'''
        # show the output images
        #cv2.imshow("Before NMS", orig)

        now = time.time()
        fps = frame / (now - start)
        fps = np.round(fps, 2)
        cv2.putText(bgr_image_input, "fps: " + str(fps), (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)

        cv2.imshow("After NMS", bgr_image_input)

        videoWriter.write(bgr_image_input)
        key_pressed = cv2.waitKey(1) & 0xFF
        if key_pressed == 27 or key_pressed == ord('q'):
            break