Ejemplo n.º 1
0
def worker(input_q, output_q, cap_params, frame_processed):
    print(">> Loading frozen model for worker.")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.compat.v1.Session(graph=detection_graph)
    while True:
        frame = input_q.get()
        if (frame is not None):
            '''Boxes contain coordinates for detected hands
            Scores contains condfidence levels
            If len(boxes) > 1, at least one hand is detected
            You can change the score_thresh value as desired'''
            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)

            # Draws bounding boxes
            detector_utils.draw_box_on_image(cap_params['num_hands_detect'],
                                             cap_params["score_thresh"],
                                             scores, boxes,
                                             cap_params['im_width'],
                                             cap_params['im_height'], frame)
            # Adds frame annotated with bounding box to queue
            output_q.put(frame)
            frame_processed += 1
        else:
            output_q.put(frame)
    sess.close()
Ejemplo n.º 2
0
def grab_hands(filename):
    if not os.path.exists('converted'):
        os.makedirs('converted')
    cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL)
    img = cv2.imread(filename)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    boxes, scores = detector_utils.detect_objects(img, detection_graph, sess)
    #print(boxes, scores)
    num_hands_detect = 2
    im_width, im_height = (640, 360)
    points = detector_utils.draw_box_on_image(num_hands_detect, 0.27, scores,
                                              boxes, im_width, im_height, img)
    cv2.imshow('Single-Threaded Detection',
               cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
    filename = filename.split("/")[-1]
    print(filename)
    print(len(points))
    if len(points) == 0:
        return
    for p1, p2 in points:
        w = p2[0] - p1[0]
        h = p2[1] - p1[1]
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cropped = img[(p1[1] - ADJUSTMENT):ADJUSTMENT + p1[1] + h,
                      (p1[0] - ADJUSTMENT):ADJUSTMENT + p1[0] + w]
        #cv2.imshow("cropped", cropped)

        fname = f'converted/{filename}'
        print(fname)
        cv2.imwrite(fname, cropped)
        return fname
Ejemplo n.º 3
0
def worker_hands(input_q, output_q):
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    while True:
        frame = input_q.get()
        if frame is not None:
            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)
            output_q.put((boxes, scores))
        else:
            output_q.put((boxes, scores))
    sess.close()
Ejemplo n.º 4
0
    def detect(self, rgb_image):
        # returns (top [0], left [1], bottom [2], right [3])
        boxes, confidences = detector_utils.detect_objects(
            rgb_image, self.detection_graph, self.sess)

        im_height, im_width = rgb_image.shape[:2]

        detection_th = self.detector_params.get('detection_th', 0.2)
        objects = [(box[0] * im_height, box[3] * im_width, box[2] * im_height,
                    box[1] * im_width)
                   for box, score in zip(boxes, confidences)
                   if score >= detection_th]
        # change to an array of (x, y, w, h)
        return [(int(left), int(top), int(right - left), int(bottom - top))
                for (top, right, bottom, left) in objects]
Ejemplo n.º 5
0
def main(argv=None):


	global im
	process_start = time.time()

	#location of the instructions
	font_location1 = 0
	font_location2 = 25
	font_size = 0.6
	font_thickness = 2
	font_color = (255,255,255)

	timing = 0

	timer2 = 0

	#whether the ten seconds countdown finished
	time_up = 0

	#count how many frames have been processed
	t = 1

	#Count in how many frames we detect the right person
	nb_fr = 1

	vs = cv2.VideoCapture(FLAGS.test_data_path)


	length = int(vs.get(cv2.CAP_PROP_FRAME_COUNT))

	print("frames of the video:" ,length)

	fps = vs.get(cv2.CAP_PROP_FPS)

	print("FPS of the video:   ",fps)

	video_duration = round(length/fps,2)

	print("duration of the video:",video_duration)

	#process every certain mount(gap) of frame
	gap = 7

	#ten seconds countdown
	threshold = fps / gap *10

	fps = FPS().start()

	frame_count = 0

	#initialize variables
	over=0

	#detect if there's any suspicious action, e.g., remove the pill
	suspicious = 0

	#whether the ten senconds countdown started
	start_counting = 0

	pill_removed = 0

	#patient needs to show the pill number for a certain amount of time(shown_time_threshold)
	shown_time_threshold = threshold / 2
	shown_time = 0

	tolerance = 0

	start_tracking = 0

	while True:

		ret, frame = vs.read()

		if ret is False:
			break

		frame_count = frame_count + 1

		#process every certain amount(gap) of frame
		if frame_count % gap == 0:

			if (over==0) :

				t = t + 1

			frame = imutils.resize(frame, width =450)

			im = frame[:, :, ::-1]

			orig = frame.copy()

			image_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

			pill_inside = 0

			mouth_close = 0

			# find all the faces and make sure there can not be more than one person
			if len(face_recognition.face_locations(orig)) == 0:
				pass
			elif len(face_recognition.face_locations(orig)) > 1:
				print("WARNING: two person appear!")
				pass

			else:
				face_location = face_recognition.face_locations(orig)
				unknown_face_encoding = face_recognition.face_encodings(orig, face_location)[0]
				index = utils.recognize_face(unknown_face_encoding, known_faces_encoding)
				name = known_names[index]
				if (name == FLAGS.name)&(over==0):
					nb_fr += 1
				cv2.putText(im[:, :, ::-1], name, (font_location1, font_location2), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness)
				top, right, bottom, left = face_location[0]

				face_height = bottom - top
				# Draw a box around the face
				cv2.rectangle(im[:, :, ::-1], (left, top), (right, bottom), (0, 0, 255))

				try:
					(x, y, w, h) = mouth_detection.mouth_detection_video(orig, detector, predictor)
					cv2.rectangle(im[:, :, ::-1], (x, y), (x + w, y + h), (0, 0, 255))
					d = int(0.35 * h)
					#get the mouth area
					roi = orig[y + d:y + h, x:x + w]
					#detect if there's pill inside the mouth and get the pill location by white color detection in the mouth area
					(px, py, pw, ph) = utils.color_detection_white(roi)

					# pill detected
					if (pw!=0):
						# Draw a box around the pill
						cv2.rectangle(im[:, :, ::-1], (x + px, y + py+ d ), (x + px + pw, y + py + ph +d), (0, 255, 0), font_thickness)
						pill_inside = 1
						start_tracking = 1

					else:
						pill_inside = 0

					#detect whether the mouth is close
					if h < 0.2 * face_height:
						mouth_close = 1
					else:
						mouth_close = 0
						if pill_inside==0 & start_tracking==1:
							suspicious = 1
				except:
					pass

			#detect hands and get the scores of the detected hands
			boxes1, scores1 = detector_utils.detect_objects(image_np,
			                                                detection_graph, sess1)

			h, w = im.shape[:2]

			# draw a box around the hands whose score is greater than the score_thresh
			hands_detected = detector_utils.draw_box_on_image(num_hands_detect, score_thresh,
					                            scores1, boxes1, w, h,
					                            im[:, :, ::-1])

			if (over ==0):
				#step one & two (when the ten seconds count down didn't start or finish):
				if timer2 == 0 & time_up== 0:
					#step one:show the number for certain amount of frames
					if shown_time< shown_time_threshold :
						cv2.putText(im[:, :, ::-1],
						            "Please put the pill in front of your mouth,",
						            (font_location1, font_location2 + 25),
						            cv2.FONT_HERSHEY_SIMPLEX,
						            font_size, font_color,
						            font_thickness)
						cv2.putText(im[:, :, ::-1],
						            "with the number clearly visible to the camera.",
						            (font_location1, font_location2 + 50),
						            cv2.FONT_HERSHEY_SIMPLEX,
						            font_size, font_color,
						            font_thickness)
						#when the pill is hold in front of mouth, start counting
						if (pill_inside==1)&(hands_detected==1):
							shown_time = shown_time + 1

					#Step two:after the number is shown for a certain amount of time
					else:
						cv2.putText(im[:, :, ::-1], "Please put the pill on your tongue,",
						            (font_location1, font_location2 + 75),
						            cv2.FONT_HERSHEY_SIMPLEX,
						            font_size, font_color,
						            font_thickness)
						cv2.putText(im[:, :, ::-1], "then remove your hands.",
						            (font_location1, font_location2 + 100),
						            cv2.FONT_HERSHEY_SIMPLEX,
						            font_size, font_color,
						            font_thickness)


				#Step three:the pill is put inside the mouth(pill is inside the mouth, no hands detected)
				if (pill_inside == 1) & (hands_detected==0) & (time_up == 0) :

						cv2.putText(im[:, :, ::-1], "Please keep the pill on your tongue for 10 seconds",
						            (font_location1, font_location2+125), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness)
						cv2.putText(im[:, :, ::-1],
						            "with your mouth closed.",
						            (font_location1, font_location2 + 150), cv2.FONT_HERSHEY_SIMPLEX, font_size,
						            font_color, font_thickness)
						#pill is inside the mouth, ten seconds countdown can be strated when the mouth is close
						timer2 = 1


				if timer2 == 1 :
					#if there's hand in the frame during the ten seconds countdown, we assume the patient took the pill out of the mouth
					if  (hands_detected == 1)&(start_counting==1):
						cv2.putText(im[:, :, ::-1], "Please don't remove the pill!", (font_location1, font_location2+175), cv2.FONT_HERSHEY_SIMPLEX, font_size,
						            (0,0,0), font_thickness)
						#reset the ten seconds countdown
						timer2 = 0
						timing = 0
						pill_removed = 1


					else:
						if mouth_close==1:
							cv2.putText(im[:, :, ::-1], "Starting the 10 seconds countdown...", (font_location1, font_location2+200), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color, font_thickness)
							timing = timing + 1
							start_counting = 1


				#Step four:when the ten seconds countdown is over, patient should open the mouth and show the pill is still on the tongue to make sure he/she didn't took out the pill
				if timing > threshold:
					cv2.putText(im[:, :, ::-1], "Please open your mouth and show", (font_location1, font_location2+225), cv2.FONT_HERSHEY_SIMPLEX, font_size,
					            font_color, font_thickness)
					cv2.putText(im[:, :, ::-1],
					            "the pill is still on your tongue.",
					            (font_location1, font_location2 + 250), cv2.FONT_HERSHEY_SIMPLEX, font_size,
					            font_color, font_thickness)
					time_up = 1

				else:
					time_up = 0


				if time_up == 1:
					# pill is detected inside the mouth
					if (mouth_close == 0)&(pill_inside==1):
						cv2.putText(im[:, :, ::-1], "Thank you. You accomplished all the steps.", (font_location1, font_location2+275),cv2.FONT_HERSHEY_SIMPLEX, font_size,font_color, font_thickness)
						cv2.putText(im[:, :, ::-1],
						            "In two minutes we will verify that the number was correct.",
						            (font_location1, font_location2 + 300), cv2.FONT_HERSHEY_SIMPLEX, font_size,
						            font_color, font_thickness)
						cv2.putText(im[:, :, ::-1],
						            "was correct.",
						            (font_location1, font_location2 + 325), cv2.FONT_HERSHEY_SIMPLEX, font_size,
						            font_color, font_thickness)
						global finished
						#the patient followed all the instruction
						finished = 1
						#detection is finished
						over = 1

					#if we can't detect pill in the mouth, it may because the patient is opening his/her mouth and the pill is blocked
					if(mouth_close==0)&(pill_inside==0):
						tolerance = tolerance + 1

					#if hands show up before the pill is detected, we assume the patient took out the pill
					if (finished==0)&(hands_detected==1):
						#the patient didn't follow all the instructions
						finished = 0
						#detection is finished
						over = 1

					elif (tolerance > 3):
						over = 1


			else:
				cv2.putText(im[:, :, ::-1], "Thank you. You accomplished all the steps.",
				            (font_location1, font_location2 + 275), cv2.FONT_HERSHEY_SIMPLEX, font_size, font_color,
				            font_thickness)
				cv2.putText(im[:, :, ::-1],
				            "In two minutes we will verify that the number",
				            (font_location1, font_location2 + 300), cv2.FONT_HERSHEY_SIMPLEX, font_size,
				            font_color, font_thickness)
				cv2.putText(im[:, :, ::-1],
				            "was correct.",
				            (font_location1, font_location2 + 325), cv2.FONT_HERSHEY_SIMPLEX, font_size,
				            font_color, font_thickness)

			cv2.imshow("im", im[:, :, ::-1])

			if cv2.waitKey(25) & 0xFF == ord('q'):
				cv2.destroyAllWindows()
				break

		else:
			pass

	cv2.waitKey(0)

	face_detection_result = nb_fr/t

	#calculate the processing time
	process_time = time.time() - process_start

	if face_detection_result > 0.6:
		right_person = 1
		print("It's the right person")
		sys.stdout.flush()
	else:
		right_person = 0
		print("It's not the right person")
		sys.stdout.flush()

	if finished == 1:
		print("Detection finished")
		sys.stdout.flush()
	else:
		print("Detection is not finished")
		sys.stdout.flush()


	if pill_removed==1:
		print("pill has been removed")
		sys.stdout.flush()

	print("process time:",process_time)
	print("suspicious",suspicious)
	#print("Video length:",video_duration)

	#print(face_detection_result)

	fps.stop()
	vs.release()
Ejemplo n.º 6
0
import tensorflow as tf
import detector_utils
import cv2

detection_graph, sess = detector_utils.load_inference_graph()

img = cv2.imread('hands3.jpeg')

boxes, scores = detector_utils.detect_objects(img, detection_graph, sess)

x,y = detector_utils.get_center(2, 0.05, scores, boxes, img.shape[1], img.shape[0], img)
detector_utils.draw_box_on_image(2, 0.05, scores, boxes, img.shape[1], img.shape[0], img)
img = cv2.resize(img, (int(img.shape[1]/3),int(img.shape[0]/3)))
cv2.imshow('', img)
cv2.waitKey(0)
Ejemplo n.º 7
0
def main(argv=None):

	os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
	#whether we have detcted the same number as the input
	number_correct = 0

	time1 = time.time()

	with tf.get_default_graph().as_default():
		input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
		global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

		f_score, f_geometry = model.model(input_images, is_training=False)
		#use exonential moving average
		variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
		saver = tf.train.Saver(variable_averages.variables_to_restore())

		with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
			#load weights,biases,gradients and other variables
			ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
			#load the sece text detection model
			model_path = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
			saver.restore(sess, model_path)

			print("processing starts")

			duration_start = time.time()
			#read the frame of the input video
			vs = cv2.VideoCapture(FLAGS.test_data_path)

			fps = FPS().start()

			frame_count = 0

			large = 0

			while True:
				#number is not verified
				not_verified = 1
				processing_time = time.time()-duration_start
				print(processing_time)
				frame_count = frame_count + 1

				frame = vs.read()
				frame = frame[1]

				if frame is None:
					break
				pill_ready = 0
				# We start the number recognition when the pill is put in front of the mouth; Before that, we downsize the frame to improve the efficiency
				if large == 1:
					frame = imutils.resize(frame, height = 1500)
				#if there's hand holding the pill in fornt of the mouth ,we enlarge the frame and do scene text detection and reocgnition
				else:
					frame = imutils.resize(frame, height=200)
				im = frame[:, :, ::-1]
				orig = frame.copy()
				image_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

				#detect hands and get the scores of the detected hands
				boxes1, scores1 = detector_utils.detect_objects(image_np,
				                                                detection_graph, sess1)

				h, w = im.shape[:2]

				#draw a box around the hands whose score is greater than the score_thresh
				hands_detected = detector_utils.draw_box_on_image(num_hands_detect, score_thresh,
				                                                  scores1, boxes1, w, h,
				                                                  im[:, :, ::-1])

				# if there's no hand holding the pill, we do not process the fraem
				if hands_detected==0:
					pass

				if len(face_recognition.face_locations(orig)) == 0:
					pass
				elif len(face_recognition.face_locations(orig)) > 1:
					pass

				else:
					#face detection
					face_location = face_recognition.face_locations(orig)

					top, right, bottom, left = face_location[0]

					cv2.rectangle(im[:, :, ::-1], (left, top), (right, bottom), (0, 0, 255))

					#we do number and letter detection inside the face area
					roi_text = orig[top:bottom, left:right]

					try:
						#mouth detection
						(x, y, w, h) = mouth_detection.mouth_detection_video(orig, detector, predictor)
						cv2.rectangle(im[:, :, ::-1], (x, y), (x + w, y + h), (0, 0, 255))
						d = int(0.35 * h)
						roi = orig[y + d:y + h, x:x + w]
						global px, py, pw, ph
						#pill detection inside the mouth
						(px, py, pw, ph) = utils.color_detection_white(roi)

						#pill detected
						if (pw != 0):

							cv2.rectangle(im[:, :, ::-1], (x + px, y + py + d), (x + px + pw, y + py + ph + d),
							              (0, 255, 0), font_thickness)

							large = 1

					except:
						pass


				if (number_correct == 0)&(large == 1):

					try:
						start_time = time.time()
						#resize the area of number and letter detection
						im_resized, (ratio_h, ratio_w) = utils.resize_image(roi_text)

						timer = {'net': 0, 'restore': 0, 'nms': 0}
						start = time.time()
						score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: [im_resized]})
						timer['net'] = time.time() - start

						#detect the number and letter, and get the boxes which contains the location of the number or letter
						boxes, timer = utils.detect(score_map=score, geo_map=geometry, timer=timer)

						#if the box is not none, resize the box for further recognition
						if boxes is not None:
							boxes = boxes[:, :8].reshape((-1, 4, 2))
							boxes[:, :, 0] /= ratio_w
							boxes[:, :, 1] /= ratio_h

						if boxes is not None:
							for indBoxes, box in enumerate(boxes):
								#number recognition by morphology processing and tesseract(the function was written in utils.py)
								text = utils.recognize_to_text(roi_text[:, :, ::-1], box)
								#if any number or letter has been detected, we set the not_berified to 0
								if text is not None:
									not_verified = 0

								#print("[recognize box({})] text: {}".format(indBoxes, text))
								box = utils.sort_poly(box.astype(np.int32))
								if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(
										box[3] - box[0]) < 5:  # strip small box
									continue

								cv2.putText(im[:, :, ::-1], text, (50, 250), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

								if text == FLAGS.expected:
									number_correct = 1

					except:
						pass

				time1 = time.time() - time1
				time2 = time.time() - duration_start

				#if the time of number or letter recognition is greater than certain amount of time, stop processing
				if time2>180:
					break
				#if we detect the same number as the input, stop processing
				if (number_correct == 1):
					break

				#cv2.imshow("im", im[:, :, ::-1])
				if cv2.waitKey(25) & 0xFF == ord('q'):
					cv2.destroyAllWindows()

	fps.stop()
	vs.release()

	#if we detect the same number or letter as the input
	if number_correct==1:
		print("Number read is", text, ", which corresponds correctly to the pill that was dispensed.")
		sys.stdout.flush()
	#if we didn't detect any number or letter
	elif (not_verified == 1):
		print("The number is not verified")
		sys.stdout.flush()
	#if we didn't detect the right number or letter
	else:
		print("Number read is",text,", which is different from the number on the pill. We will check this manually")
		sys.stdout.flush()

	print("video processing time",time2)