class FacialLandmarksDetectionModel: ''' Class for defining the FaceLandmark Model Attributes. ''' def __init__(self, model_name, threshold, device='CPU', extensions=None, async_mode=True, plugin=None): ''' TODO: Use this to set your instance variables. ''' self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.out_shape = None self.exec_network = None self.threshold = threshold self.device = device self.async_mode = async_mode self.infer_request = None self.net = None self.net_plugin = None self.model_xml = model_name self.extensions = extensions def load_model(self, model_xml, cpu_extension=None): ''' TODO: load the model ''' self.model_xml = model_name model_bin = os.path.splitext(model_xml)[0] + ".bin" self.device = device self.extensions = extensions # Initialize the plugin self.plugin = IECore() # Add anu neccessary extension if cpu_extension and "CPU" in device: self.plugin.add_extension(cpu_extension, device) # Read the IR as a IENetwork # deprecated in 2020 version self.network = self.plugin.read_network(model=model_xml, weights=model_bin) self.check_plugin(self.plugin) ## Check for supported layers supported_layers = self.plugin.query_network(network=self.network, device_name=device) ## check for unsuported layers unsupported_layers = [ l for l in self.network.layers.keys() if l not in self.plugin.get_supported_layers(self.network) ] if len(unsupported_layers) != 0: print("Unsupported layers found: {}".format(unsupported_layers)) print("Please check for supported extensions.") exit(1) # Loading the Intermediate Representation (IR) IENetwork into the plugin self.exec_network = self.plugin.load_network(self.network, device) # Get the input layer self.input_blob = next(iter(self.network.inputs)) self.output_blob = next(iter(self.network.outputs)) self.out_shape = self.network.outputs[self.output_blob].shape logging.info("Model Facial landmark Detection output shape printed : ", self.out_shape) return def predict(self, image, width, height, threshold): ''' TODO: You will need to complete this method. This will be use to run predictions on the input image. ''' # ref scource: ref source code: https://github.com/gauravshelangia/computer-pointer-controller/blob/master/src/face_detection.py tally = 0 values = None width = image.shape[1] height = image.shape[0] img_frame = self.preprocess_input(image) if self.async_mode: self.exec_network.requests[0].async_infer( inputs={self.input_blob: img_frame}) else: self.exec_network.requests[0].infer( inputs={self.input_blob: img_frame}) if self.exec_network.requests[0].wait(-1) == 0: outputs = self.exec_network.requests[0].outputs[self.output_blob] img_frame, values = self.preprocess_output(image, outputs) return values, img_frame def get_input_shape(self): ### Return the shape of the input layer ### return self.network.inputs[self.input_blob].shape def preprocess_input(self, image): ''' TODO: You will need to complete this method. Here, I define the function to preprocess the data befeoe the data into the model for inference. ''' (n, c, h, w) = self.network.inputs[self.input_blob].shape # Reshape the image to input size of land_mark detection model # the input shape should be h*w = 48x48 - as described in openVino doc img_frame = cv2.resize(image, (w, h)) img_frame = img_frame.transpose((2, 0, 1)) img_frame = img_frame.reshape((n, c, h, w)) return img_frame def preprocess_output(self, frame, outputs): ''' TODO: You will need to complete this method. The reference source code: ##https://knowledge.udacity.com/questions/285095 ''' now_value = 0 values = [] outputs = outputs[0] xl, yl = outputs[0][0] * width, outputs[1][0] * height xr, yr = outputs[2][0] * width, outputs[3][0] * height # To draw the box for the left eye xlmin = xl - 20 ylmin = yl - 20 xlmax = xl + 20 ylmax = yl + 20 # To draw the box for the right eye xrmin = xr - 20 yrmin = yr - 20 xrmax = xr + 20 yrmax = yr + 20 cv2.rectangle(img_frame, (xlmin, ylmin), (xlmax, ylmax), (0, 0, 255), 2) cv2.rectangle(img_frame, (xrmin, yrmin), (xrmax, yrmax), (0, 0, 255), 2) values = [[int(xlmin), int(ylmin), int(xlmax), int(ylmax)], [int(xrmin), int(yrmin), int(xrmax), int(yrmax)]] return img_frame, values def clean(self): """ delete all the open instances :return: None """ del self.plugin del self.network del self.exec_network del self.net del self.device
class Network: """ Load and configure inference plugins for the specified target devices and performs synchronous and asynchronous modes for the specified infer requests. """ def __init__(self): """ Initialize any class variables desired """ self.net = None self.plugin = None self.input_blob = None self.out_blob = None self.net_plugin = None self.infer_request_handle = None def load_model(self, model, device, input_size, output_size, num_requests, cpu_extension=None, plugin=None): """ Loads a network and an image to the Inference Engine plugin. :param model: .xml file of pre trained model :param cpu_extension: extension for the CPU device :param device: Target device :param input_size: Number of input layers :param output_size: Number of output layers :param num_requests: Index of Infer request value. Limited to device capabilities. :param plugin: Plugin for specified device :return: Shape of input layer """ model_xml = model model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device # and load extensions library if specified if not plugin: log.info("Initializing plugin for {} device...".format(device)) self.plugin = IEPlugin(device=device) else: self.plugin = plugin if cpu_extension and 'CPU' in device: self.plugin.add_cpu_extension(cpu_extension) # Read IR log.info("Reading IR...") self.net = IENetwork(model=model_xml, weights=model_bin) log.info("Loading IR to the plugin...") if self.plugin.device == "CPU": supported_layers = self.plugin.get_supported_layers(self.net) not_supported_layers = \ [l for l in self.net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by " "the plugin for specified device {}:\n {}".format( self.plugin.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path" " in command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) if num_requests == 0: # Loads network read from IR to the plugin self.net_plugin = self.plugin.load(network=self.net) else: self.net_plugin = self.plugin.load(network=self.net, num_requests=num_requests) self.input_blob = next(iter(self.net.inputs)) self.out_blob = next(iter(self.net.outputs)) assert len(self.net.inputs.keys()) == input_size, \ "Supports only {} input topologies".format(len(self.net.inputs)) assert len(self.net.outputs) == output_size, \ "Supports only {} output topologies".format(len(self.net.outputs)) return self.plugin, self.get_input_shape() def load_model_2(self, model, device, input_size, output_size, num_requests, cpu_extension=None, plugin=None): """ Load the model Check for supported layers Add any necessary extensions Return the loaded inference plugin """ model_xml = model model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device # and load extensions library if specified if not plugin: log.info("Initializing plugin for {} device...".format(device)) self.plugin = IECore() else: self.plugin = plugin if cpu_extension and 'CPU' in device: self.plugin.add_extension(cpu_extension, "CPU") # Read IR log.info("Reading IR...") self.net = IECore.load_network(model=model_xml, weights=model_bin) log.info("Loading IR to the plugin...") if "CPU" in device: supported_layers = self.plugin.query_network(self.net, "CPU") not_supported_layers = \ [l for l in self.net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by " "the plugin for specified device {}:\n {}".format( device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path" " in command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) if num_requests == 0: # Loads network read from IR to the plugin self.net_plugin = self.plugin.load_network(network=self.net, device_name=device) else: self.net_plugin = self.plugin.load_network( network=self.net, num_requests=num_requests, device_name=device) self.input_blob = next(iter(self.net.inputs)) self.out_blob = next(iter(self.net.outputs)) assert len(self.net.inputs.keys()) == input_size, \ "Supports only {} input topologies".format(len(self.net.inputs)) assert len(self.net.outputs) == output_size, \ "Supports only {} output topologies".format(len(self.net.outputs)) return self.plugin, self.get_input_shape() def get_input_shape(self): """ Return the shape of the input layer """ return self.net.inputs[self.input_blob].shape def exec_net(self, request_id, frame): """ Start an asynchronous request Return any necessary information Note: You may need to update the function parameters. """ self.infer_request_handle = self.net_plugin.start_async( request_id=request_id, inputs={self.input_blob: frame}) return self.net_plugin def wait(self, request_id): """ Wait for the request to be complete. Return any necessary information Note: You may need to update the function parameters. """ request_waiting = self.net_plugin.requests[request_id].wait(-1) return request_waiting def get_output(self, request_id, output=None): """ Extract and return the output results Note: You may need to update the function parameters. """ if output: return self.infer_request_handle.outputs[output] else: return self.net_plugin.requests[request_id].outputs[self.out_blob]
class HeadPoseEstimationModel: ''' Class for defining the HeadPoseEstimation Model Attributes. ''' def __init__(self, model_name, threshold, device='CPU', extensions=None, async_mode=True, plugin=None): ''' TODO: Use this to set your instance variables. ''' self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.out_shape = None self.exec_network = None self.threshold = threshold self.device = device self.async_mode = async_mode self.infer_request = None self.net_plugin = None self.net = None self.extensions = extensions self.model_xml = model_name def load_model(self, model_xml, cpu_extension=None): ''' TODO: load models ''' self.model_xml = model_name model_bin = os.path.splitext(model_xml)[0] + ".bin" self.device = device self.extensions = extensions # Initializing the plugins self.plugin = IECore() # Add a CPU extension and any neccessary extension if cpu_extension and "CPU" in device: self.plugin.add_extension(cpu_extension, device) # Reading the Intermediate Representation (IR) model as a IENetwork # IENetwork deprecated in 2020 version self.network = self.plugin.read_network(model=model_xml, weights=model_bin) self.check_plugin(self.plugin) ## Check for supported layers supported_layers = self.plugin.query_network(network=self.network, device_name=device) ## check for unsupported layers unsupported_layers = [ l for l in self.network.layers.keys() if l not in self.plugin.get_supported_layers(self.network) ] if len(unsupported_layers) != 0: print("Unsupported layers found: {}".format(unsupported_layers)) print("Please check for supported extensions.") exit(1) # Loading the IENetwork into the plugin self.exec_network = self.plugin.load_network(self.network, device) # Get the input layer self.input_blob = next(iter(self.network.inputs)) self.output_blob = next(iter(self.network.outputs)) self.out_shape = self.network.outputs[self.output_blob].shape logging.info("Model Head Pose Detection output shape printed : ", self.out_shape) return def predict(self, image, width, height, threshold): ''' TODO: You will need to complete this method. To run predictions on the input image. ''' # [1,3,60,60] tally = 0 valuess = None width = image.shape[1] height = image.shape[0] img_frame = self.preprocess_input(image) if self.async_mode: self.exec_network.requests[0].async_infer( inputs={self.input_blob: img_frame}) else: self.exec_network.requests[0].infer( inputs={self.input_blob: img_frame}) if self.exec_network.requests[0].wait(-1) == 0: outputs = self.exec_network.requests[0].outputs person_in_frame, target_gaze = self.preprocess_output( image, outputs) return person_in_frame, target_gaze def get_input_shape(self): ### Return the shape of the input layer ### return self.network.inputs[self.input_blob].shape def preprocess_input(self, image): ''' TODO: You will need to complete this method. preprocessing the input shape ''' # [1,3,60,60] (n, c, h, w) = self.network.inputs[self.input_blob].shape img_frame = cv2.resize(image, (w, h)) img_frame = img_frame.transpose((2, 0, 1)) img_frame = img_frame.reshape((n, c, h, w)) return img_frame def preprocess_output(self, image, outputs, width, height): ''' TODO: You will need to complete this method. Output layer names in Inference Engine format: name: "angle_y_fc", shape: [1, 1] - Estimated yaw (in degrees). name: "angle_p_fc", shape: [1, 1] - Estimated pitch (in degrees). name: "angle_r_fc", shape: [1, 1] - Estimated roll (in degrees). Each output contains one float value (yaw, pitсh, roll). ''' # To Parse head pose detection results # ref: source code: https://knowledge.udacity.com/questions/242566 pitch = outputs["angle_p_fc"][0] yaw = outputs["angle_y_fc"][0] roll = outputs["angle_r_fc"][0] # Draw output if ((yaw > -22.5) & (yaw < 22.5) & (pitch > -22.5) & (pitch < 22.5)): return True, [[yaw, pitch, roll]] else: return False, [[0, 0, 0]] # code source: https://knowledge.udacity.com/questions/171017 def draw_axes(self, img_frame, center_of_face, yaw, pitch, roll): focal_length = 950.0 scale = 50 yaw *= np.pi / 180.0 pitch *= np.pi / 180.0 roll *= np.pi / 180.0 cx = int(center_of_face[0]) cy = int(center_of_face[1]) Rx = np.array([[1, 0, 0], [0, math.cos(pitch), -math.sin(pitch)], [0, math.sin(pitch), math.cos(pitch)]]) Ry = np.array([[math.cos(yaw), 0, -math.sin(yaw)], [0, 1, 0], [math.sin(yaw), 0, math.cos(yaw)]]) Rz = np.array([[math.cos(roll), -math.sin(roll), 0], [math.sin(roll), math.cos(roll), 0], [0, 0, 1]]) # R = np.dot(Rz, Ry, Rx) # ref: https://www.learnopencv.com/rotation-matrix-to-euler-angles/ # R = np.dot(Rz, np.dot(Ry, Rx)) R = Rz @ Ry @ Rx # print(R) camera_matrix = self.build_camera_matrix(center_of_face, focal_length) xaxis = np.array(([1 * scale, 0, 0]), dtype='float32').reshape(3, 1) yaxis = np.array(([0, -1 * scale, 0]), dtype='float32').reshape(3, 1) zaxis = np.array(([0, 0, -1 * scale]), dtype='float32').reshape(3, 1) zaxis1 = np.array(([0, 0, 1 * scale]), dtype='float32').reshape(3, 1) o = np.array(([0, 0, 0]), dtype='float32').reshape(3, 1) o[2] = camera_matrix[0][0] xaxis = np.dot(R, xaxis) + o yaxis = np.dot(R, yaxis) + o zaxis = np.dot(R, zaxis) + o zaxis1 = np.dot(R, zaxis1) + o xp2 = (xaxis[0] / xaxis[2] * camera_matrix[0][0]) + cx yp2 = (xaxis[1] / xaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(img_frame, (cx, cy), p2, (0, 0, 255), 2) xp2 = (yaxis[0] / yaxis[2] * camera_matrix[0][0]) + cx yp2 = (yaxis[1] / yaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(img_frame, (cx, cy), p2, (0, 255, 0), 2) xp1 = (zaxis1[0] / zaxis1[2] * camera_matrix[0][0]) + cx yp1 = (zaxis1[1] / zaxis1[2] * camera_matrix[1][1]) + cy p1 = (int(xp1), int(yp1)) xp2 = (zaxis[0] / zaxis[2] * camera_matrix[0][0]) + cx yp2 = (zaxis[1] / zaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(img_frame, p1, p2, (255, 0, 0), 2) cv2.circle(img_frame, p2, 3, (255, 0, 0), 2) return img_frame # code source: https://knowledge.udacity.com/questions/171017 def build_camera_matrix(self, center_of_face, focal_length): cx = int(center_of_face[0]) cy = int(center_of_face[1]) camera_matrix = np.zeros((3, 3), dtype='float32') camera_matrix[0][0] = focal_length camera_matrix[0][2] = cx camera_matrix[1][1] = focal_length camera_matrix[1][2] = cy camera_matrix[2][2] = 1 return camera_matrix def clean(self): """ deletes all the open instances :return: None """ del self.plugin del self.network del self.exec_network del self.net del self.device
class Network: """ Load and configure inference plugins for the specified target devices and performs synchronous and asynchronous modes for the specified infer requests. """ def __init__(self): ### TODO: Initialize any class variables desired ### self.net = None self.plugin = None self.input_blob = None self.output_blob = None self.net_plugin = None self.infer_request = None def load_model(self, model, device, num_requests, cpu_extension=None, plugin=None): ### TODO: Load the model ### load_model_xml = model load_model_bin = os.path.splitext(load_model_xml)[0] + ".bin" while not plugin: log.info("Please wait. Starting plugin for {} device... ".format( device)) self.plugin = IECore() else: self.plugin = plugin if cpu_extension and CPU in device: self.plugin.add_cpu_extension(cpu_extension) log.info('Reading IR, Please wait.') self.net = IENetwork(model=load_model_xml, weights=load_model_bin) log.info( 'Completed. Loading IR to the plugin. This may take some time') ### TODO: Check for supported layers ### if self.plugin.device == "CPU": supported_layers = self.plugin.get_supported_layers(self.net) unsupported_layers = [ l for l in self.net.layers.key() if l not in supported_layers ] if len(unsupported_layers) != 0: log.error( 'There are a number of unsupported layers found: '.format( unsupported_layers)) sys.exit(1) if num_request == 0: self.net_plugin = self.plugin.load(network=self.net) else: self.net_plugin = self.plugin.load(network=self.net, num_requests=num_requests) self.input_blob = next(iter(self.net.input)) self.output_blob = next(iter(self.net.output)) if len(self.net.inputs.key()) == input_size: log.error( 'Sorry, this app supports {} input topologies. Please make the necessary changes and try again' .format(len(self.net.inputs))) sys.exit(1) if len(self.net.outputs) == output_size: log.error( 'Sorry, this app supports {} output topologies. Please make the necessary changes and try again' .format(len(self.net.inputs))) sys.exit(1) return self.plugin, self.get_input_shape ### TODO: Add any necessary extensions ### ### TODO: Return the loaded inference plugin ### ### Note: You may need to update the function parameters. ### return def get_input_shape(self): ### TODO: Return the shape of the input layer ### return self.net.inputs[self.input.blob].shape def exec_net(self, request_id, frame): ### TODO: Start an asynchronous request ### ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### self.infer_request = self.net_plugin.start_async( request_id=request_id, inputs={self.input_blob: frame}) return self.net_plugin def wait(self, request_id): ### TODO: Wait for the request to be complete. ### ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### wait_status = self.net_plugin.requests[request_id].wait(-1) return wait_status def get_output(self): ### TODO: Extract and return the output results ### Note: You may need to update the function parameters. ### if output: result = self.infer_request.outputs[output] else: result = self.net_plugin.requests[request_id].outputs[ self.output_blob] return result def delete_instances(self): del self.net_plugin del self_plugin del self.net
class GazeEstimationModel: ''' Class for defining GazeEstimation Model and Attributes. - ''' def __init__(self, model_name, threshold, device='CPU', extensions=None, async_mode=True, plugin=None): ''' TODO: Use this to set your instance variables. ''' self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.out_shape = None self.exec_network = None self.threshold = threshold self.device = device self.async_mode = async_mode self.infer_request = None self.net_plugin = None self.net = None self.model_xml = model_name self.extensions = extensions def load_model(self, model_xml, gaze_angles, input_gaze_angles, cpu_extension=None): ''' TODO: load models ''' self.model_xml = model_name model_bin = os.path.splitext(model_xml)[0] + ".bin" self.device = device self.extensions = extensions # Initializing the plugins self.plugin = IECore() # Add any neccesary extensions ## if cpu_extension and "CPU" in device: self.plugin.add_extension(cpu_extension, device) # Reading the Intermediate Representation (IR) model as a IENetwork # deprecated in 2020 version self.network = self.plugin.read_network(model=model_xml, weights=model_bin) self.check_plugin(self.plugin) ## check for supported layer supported_layers = self.plugin.query_network(network=self.network, device_name=device) ## check for unsupported layers unsupported_layers = [ l for l in self.network.layers.keys() if l not in self.plugin.get_supported_layers(self.network) ] if len(unsupported_layers) != 0: print("Unsupported layers found: {}".format(unsupported_layers)) print("Please check for supported extensions.") exit(1) # Loading the IENetwork into the plugin self.exec_network = self.plugin.load_network(self.network, device) # Get the input layer self.input_gaze_angles = self.network.inputs['gaze_angles'] # print(self.input_pose_angles) self.output_blob = next(iter(self.network.outputs)) self.out_shape = self.network.outputs[self.output_blob].shape logging.info("Model Gaze Estimation output shape printed : ", self.out_shape) return def predict(self, l_eye_img, r_eye_img, target_gaze, img_frame, width, height): ''' TODO: The accuracy of gaze direction prediction is evaluated through the use of "mean absolute error (MAE)" of the angle (in degrees) between the ground truth and predicted gaze direction. Input_blob Blob in the format [BxCxHxW] where B = batch size C = number of channels H = image height W = image width with the name right_eye_image and the shape[1x3x60x60] Blob in the format [BxC] where: B = batch size C = number of channels with the name head_pose_angles and the shape[1x3] outputs_blob The net outputs a blob with the shape: [1x3], containing cartesian coordinates of gaze direction vector. Please note that output vector is not normalized and has non-unit length. Output layer name in INference Engine format: gaze_vector Ref: https://docs.openvinotoolkit.org/latest/omz_models_intel_gaze_estimation_adas_0002_description_gaze_estimation_adas_0002.html ''' ## for left and right eye image and shape tally = 0 values = None width = l_eye_img.shape[1] height = l_eye_img.shape[0] l_eye_img, r_eye_img = self.preprocess_input(l_eye_img, r_eye_img) # perform inference on image shape #ref: https://github.com/gauravshelangia/computer-pointer-controller/blob/master/src/facial_landmark_detection.py if self.async_mode: self.exec_network.requests[0].async_infer( inputs={ "gaze_angles": target_gaze, "l_eye_img": l_eye_img, "r_eye_img": r_eye_img }) else: self.exec_network.requests[0].infer( inputs={ "gaze_angles": target_gaze, "l_eye_img": l_eye_img, "r_eye_img": r_eye_img }) if self.exec_network.requests[0].wait(-1) == 0: outputs = self.exec_network.requests[0].outputs[self.output_blob] vout = self.preprocess_output(l_eye_img, r_eye_img, target_gaze, outputs) return vout def preprocess_input(self, l_eye_img, r_eye_img): ''' TODO: You will need to complete this method. Here I preprocess the data before feeding the data into the model for inference. ''' # left eye input shape [1,3,60,60] l_eye_img = cv2.resize(l_eye_img, (60, 60)) l_eye_img = l_eye_img.transpose((2, 0, 1)) l_eye_img = l_eye_img.reshape((1, 3, 60, 60)) # and right eye input shape[1,3,60,60] r_eye_img = cv2.resize(r_eye_img, (60, 60)) r_eye_img = r_eye_img.transpose((2, 0, 1)) r_eye_img = r_eye_img.reshape((1, 3, 60, 60)) return img_frame, l_eye_img, r_eye_img def preprocess_output(self, l_eye_img, r_eye_img, outputs, target_gaze): ''' TODO: You will need to complete this method. Here I preprocess the model before feeding the output of this model to the next model. ''' # ref source code: # Ref: https://knowledge.udacity.com/questions/254779 gaze_vector = outputs[0] roll = gaze_vector[2] #pose_angles[0][2][0] gaze_vector = gaze_vector / np.linalg.norm(gaze_vector) cs = math.cos(roll * math.pi / 180.0) sn = math.sin(roll * math.pi / 180.0) tmpX = gaze_vector[0] * cs + gaze_vector[1] * sn tmpY = -gaze_vector[0] * sn + gaze_vector[1] * cs return (tmpX, tmpY), (gaze_vector) # raise NotImplementedError def clean(self): """ This function deletes all the open instances :return: None """ del self.plugin del self.network del self.exec_network del self.net del self.device