def infer_on_video(args): plugin = Network() plugin.load_model(args.m,args.d,None) net_input_shape = plugin.get_input_shape() cap = cv2.VideoCapture(args.i) cap.open(args.i) counter = 0 incidence_flag = False while cap.isOpened(): flag,frame = cap.read() if not flag: break counter += 1 r_frame = cv2.resize(frame, (net_input_shape[3],net_input_shape[2])) r_frame = r_frame.transpose((2,0,1)) r_frame = r_frame.reshape(1,*r_frame.shape) plugin.async_inference(r_frame) if plugin.wait() == 0: result = plugin.extract_output() incidence_flag = incidence(result,counter,incidence_flag) cap.release() cv2.destroyAllWindows()
def infer_on_video(args): ### Initialize the Inference Engine plugin = Network() ### Load the network model into the IE plugin.load_model(args.m, args.d) input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, `0x00000021` on Udacity IDE, and `0x00000021` on Linux out = cv2.VideoWriter('out.mp4', 'cv2.VideoWriter_fourcc("M","J","P","G")', 30, (width, height)) # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### Pre-process the frame image = cv2.resize(frame, (input_shape[3], input_shape[2])) image = image.transpose((2, 0, 1)) image = image.reshape(1, 3, input_shape[2], input_shape[3]) ### Perform inference on the frame plugin.async_inference(image) ### Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() ### Update the frame to include detected bounding boxes for box in result[0][0]: if box[2] >= float(args.conf): x_min = int(box[3] * width) y_min = int(box[4] * height) x_max = int(box[5] * width) y_max = int(box[6] * height) cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), get_color(args.color), 1) # Write out the frame out.write(frame) # Break if escape key pressed if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows out.release() cap.release() cv2.destroyAllWindows()
def infer_on_image(args): print('INFER ON IMAGE') # Convert the args for confidence args.ct = float(args.ct) ### Initialize the Inference Engine plugin = Network() ### Load the network model into the IE plugin.load_model(args.m, args.d) net_input_shape = plugin.get_input_shape() # Read the input image image = cv2.imread(args.i) h, w = net_input_shape[2], net_input_shape[3] ### Preprocess the input image preprocessed_image = preprocessing(image, h, w) ### Perform inference on the frame plugin.async_inference(preprocessed_image) ### Get the output of inference if plugin.wait() == 0: output = plugin.extract_output() image = draw_boxes(image, output, args, w, h) cv2.imwrite(args.o, image)
def pred_at_edge(input_img): # Initialize the Inference Engine plugin = Network() # Load the network model into the IE plugin.load_model(MODEL, "CPU", CPU_EXTENSION) net_input_shape = plugin.get_input_shape() # Reading input image img = cv2.imread(input_img, cv2.IMREAD_COLOR) # Pre-process the image expand_img = preprocessing(img, net_input_shape[2], net_input_shape[3]) final_img = np.expand_dims(expand_img, axis=0) # Perform inference on the image plugin.async_inference(final_img) # Get the output of inference if plugin.wait() == 0: results = plugin.extract_output() pred = np.argmax(results) disease = SKIN_CLASSES[pred] accuracy = results[0][pred] print(disease, accuracy) return disease, accuracy
def infer_on_camera(args): # Convert the args for color and confidence args.c = convert_color(args.c) args.ct = float(args.ct) # Initialize the Inference Engine plugin = Network() # Load the network model into the IE plugin.load_model(args.m, args.d, CPU_EXTENSION) # Get input shape net_input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(0) cap.open(0) # 0 for default camera # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Process frames until video end or process is exited while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Preprocess the frame p_frame = preprocessing(frame, 384, 672) # Perform inference on the frame plugin.async_inference(p_frame) # Get the output of the inference if plugin.wait() == 0: result = plugin.extract_output() # Update the frame to include detected bounding boxes frame = draw_boxes(frame, result, args, width, height) cv2.imshow("frame", frame) if key_pressed == 27: break
def infer_on_video(args): ### TODO: Initialize the Inference Engine net = Network() ### TODO: Load the network model into the IE net.load_model(model=args.m, device=args.d, cpu_extension=CPU_EXTENSION) net_input_shape = net.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, and `0x00000021` on Linux fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter("out.mp4", fourcc, 30, (width, height)) # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the frame # Taken from lesson 2: preprocess_input.py p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) ### TODO: Perform inference on the frame net.async_inference(image=p_frame) ### TODO: Get the output of inference if net.wait() == 0: result = net.extract_output() ### TODO: Update the frame to include detected bounding boxes try: frame = draw_boxes(frame, result, args, width, height) except Exception as e: print(str(e)) # Write out the frame else: out.write(frame) # Break if escape key pressed if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows out.release() cap.release() cv2.destroyAllWindows()
def infer_on_video(args): # Convert the args for color and confidence args.c = convert_color(args.c) args.ct = float(args.ct) ### TODO: Initialize the Inference Engine plugin = Network() ### TODO: Load the network model into the IE plugin.load_model(args.m, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, and `0x00000021` on Linux out = cv2.VideoWriter(FILE_OUTPUT, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30, (width, height)) # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the frame p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) ### TODO: Perform inference on the frame plugin.async_inference(p_frame) ### TODO: Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() ### TODO: Update the frame to include detected bounding boxes frame = draw_boxes(frame, result, args, width, height) # Write out the frame out.write(frame) # Break if escape key pressed if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows out.release() cap.release() cv2.destroyAllWindows()
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold ### TODO: Load the model through `infer_network` ### plugin = Network() plugin.load_model(model, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() ### TODO: Handle the input stream ### cap = cv2.VideoCapture(args.i) cap.open(args.i) width = int(cap.get(3)) height = int(cap.get(4)) ### TODO: Loop until stream is over ### while cap.isOpened(): ### TODO: Read from the video capture ### flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the image as needed ### p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2,0,1)) p_frame = p_frame.reshape(1, *p_frame.shape) ### TODO: Start asynchronous inference for specified request ### plugin.async_inference(p_frame) ### TODO: Wait for the result ### if plugin.wait() == 0: ### TODO: Get the results of the inference request ### result = plugin.extract_output() ### TODO: Extract any desired stats from the results ### print("result :",result) ### TODO: Calculate and send relevant information on ### ### current_count, total_count and duration to the MQTT server ### ### Topic "person": keys of "count" and "total" ### client.publish("person", json.dumps({"count": 0,"total":0})) ### Topic "person/duration": key of "duration" ### client.publish("person/duration", json.dumps({"duration": 0})) ### TODO: Send the frame to the FFMPEG server ### sys.stdout.buffer.write(out_frame) sys.stdout.flush()
def emotion_detection(emotions_model, frame, result, args, width, height): """ Detect the emotion of the faces of a frame. """ # Initialize the Inference Engine plugin_emotions_detection = Network() # Load the network models into the IE plugin_emotions_detection.load_model(emotions_model, args.d, CPU_EXTENSION) net_input_shape_ed = plugin_emotions_detection.get_input_shape() for box in result[0][0]: conf = box[2] if conf >= args.ct: # calculate the rectangle box margins x_min = max(int(box[3] * width), 0) y_min = max(int(box[4] * height), 0) x_max = min(int(box[5] * width), width) y_max = min(int(box[6] * height), height) # crop the image for emotion detection cropped_frame = frame[y_min:y_max, x_min:x_max] if cropped_frame.shape[0] and cropped_frame.shape[1]: # Draw rectangle box on the input cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 0, 255), 1) print('cropped frame: ', cropped_frame.shape) # Preprocess the cropped image p_frame_ed = preprocessing(cropped_frame, net_input_shape_ed[2], net_input_shape_ed[3]) # Perform inference on the frame to detect emotion plugin_emotions_detection.async_inference(p_frame_ed) if plugin_emotions_detection.wait() == 0: result_ed = plugin_emotions_detection.extract_output() # Get the emotions class emotion_class_id = np.argmax(result_ed) emotion_class = EMOTIONS[emotion_class_id] print('emotion detected:', emotion_class) # # Crate a rectangle box to display emotion text # sub_img = frame[y_min:y_min+20, x_min:x_max] # white_rect = np.ones(sub_img.shape, dtype=np.uint8) * 255 # res = cv2.addWeighted(sub_img, 0.5, white_rect, 0.5, 1.0) # # Putting the image back to its position # frame[y_min:y_min+20, x_min:x_max] = res # Create a rectangle to display the predicted emotion cv2.rectangle(frame, (x_min, y_min), (x_max, y_min + 20), (51, 255, 196), cv2.FILLED) cv2.putText(frame, emotion_class, (x_min + 5, y_min + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) return frame
def infer_on_video(args, model): ### TODO: Connect to the MQTT server # Initialize the Inference Engine plugin = Network() # Load the network model into the IE plugin.load_model(model, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Pre-process the frame p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) # Perform inference on the frame plugin.async_inference(p_frame) # Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() # Draw the output mask onto the input out_frame, classes = draw_masks(result, width, height) class_names = get_class_names(classes) speed = randint(50, 70) ### TODO: Send the class names and speed to the MQTT server ### Hint: The UI web server will check for a "class" and ### "speedometer" topic. Additionally, it expects "class_names" ### and "speed" as the json keys of the data, respectively. ### TODO: Send frame to the ffmpeg server # Break if escape key pressed if key_pressed == 27: break # Release the capture and destroy any OpenCV windows cap.release() cv2.destroyAllWindows()
def infer_on_video(args): ### TODO: Initialize the Inference Engine plugin = Network() ### TODO: Load the network model into the IE plugin.load_model(args.m, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() print('input shape of net format fct of model use \n', net_input_shape) #[B,C,H,W] # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) print("capture 3: \n ", cap.get(3)) # video width print("capture 4: \n ", cap.get(4)) # video heigth # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, and `0x00000021` on Linux out = cv2.VideoWriter('out.mp4', 0x00000021, 30, (width, height)) # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### TODO: Pre-process the frame(each frame is an image) preproced_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) preproced_frame = preproced_frame.transpose((2, 0, 1)) preproced_frame = preproced_frame.reshape(1, *preproced_frame.shape) ### TODO: Perform inference on the frame plugin.async_inference(preproced_frame) ### TODO: Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() #print('results: \n',result) ### TODO: Update the frame to include detected bounding boxes frame = draw_boxes(frame, result, args, width, height) # Write out the frame out.write(frame) # Break if escape key pressed if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows out.release() cap.release() cv2.destroyAllWindows()
def infer_on_video(): speak('Hello! I am Alicia. Please wait while I boot up the system.') # Initialize the Inference Engine plugin = Network() class_names = [] # Load the network model into the IE plugin.load_model(MODEL, DEVICE) net_input_shape = plugin.get_input_shape() camera = PiCamera() camera.resolution = (640, 480) camera.framerate = 32 rawCapture = PiRGBArray(camera, size=(640, 480)) speak('System booted.') # allow the camera to warmup time.sleep(0.1) for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True): # grab the raw image image = frame.array key_pressed = cv2.waitKey(60) # Pre-process the frame p_frame = cv2.resize(image, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) # Perform inference on the frame plugin.async_inference(p_frame) # Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() classes = np.transpose(result[0])[1] # classes = np.unique(np.transpose(result[0])[1]) old_class_names = class_names class_names = get_class_names(classes) # publish the new environment only if something changes if class_names and class_names != old_class_names: speak_string = ''.join(class_names) speak(speak_string) print(class_names) print("----------------------") # clear the stream in preparation for the next frame rawCapture.truncate(0) # Break if escape key pressed if key_pressed == 27: break
def infer_on_video(args): # Convert the args for color and confidence args.c = convert_color(args.c) args.ct = float(args.ct) ### Initialize the Inference Engine plugin = Network() ### Load the network model into the IE n, c, h, w = plugin.load_model(args.m, args.d) net_input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(0) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, and `0x00000021` on Linux # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) cv2.imshow('Input', frame) ### Pre-process the frame p_frame = preprocessing(frame, h, w) ### Perform inference on the frame plugin.async_inference(p_frame) ### Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() processed_output = handle_pose(result, frame.shape) frame2 = create_output_image(frame, processed_output) ### Update the frame to include detected bounding boxes cv2.imshow('Output', frame2) # Break if escape key pressed if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows out.release() cap.release() cv2.destroyAllWindows()
def perform_facerecognition(face,model): plugin=Network() plugin.load_model(model=model) b,c,h,w=plugin.get_input_shape() p_image=preprocessing(face,h,w) plugin.async_inference(p_image) status=plugin.wait() if status==0: result=plugin.extract_output() candidate_embedding=result[0] return candidate_embedding
def detect_this(model,image,height,width): plugin=Network() plugin.load_model(model=model) b,c,h,w=plugin.get_input_shape() p_image=preprocessing(image,h,w) plugin.async_inference(p_image) status=plugin.wait() if status==0: result=plugin.extract_output() face=output_handler(image,result,width,height) return face
def get_embeddings(model,face_pixels): face_pixels=face_pixels.astype('float32') mean, std = face_pixels.mean(), face_pixels.std() face_pixels = (face_pixels - mean) / std plugin=Network() plugin.load_model(model=model) b,c,h,w=plugin.get_input_shape() preprocessed_image=preprocessing(face_pixels,h,w) plugin.async_inference(preprocessed_image) status=plugin.wait() if status==0: embz=plugin.extract_output() return embz[0].reshape(1,-1)
def infer_on_video(args): # Convert the args for color and threshold args.c = convert_color(args.c) args.t = float(args.t) plugin = Network() plugin.load_model(args.m, args.d) net_input_shape = plugin.get_input_shape() # Get and open cam cap = cv2.VideoCapture(0) cv2.namedWindow("Face-Detection_v1") # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: print("Failed to catch frame") break key_pressed = cv2.waitKey(60) # Pre-process the frame p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) # Perform inference on the frame plugin.async_inference(p_frame) # Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() # Update the frame to include detected bounding boxes frame = draw_boxes(frame, result, args, width, height) # Write out the frame cv2.imshow("frame", frame) # Break if escape key pressed if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows cap.release() cv2.destroyAllWindows()
def infer_on_video(args): # Initialize the Inference Engine plugin = Network() # Load the network model into the IE plugin.load_model(args.m, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) test = [(0, 0, 0)] fight = 0 # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) # Pre-process the frame p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) # Perform inference on the frame plugin.async_inference(p_frame) # Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() ### TODO: Process the output if (test[0][2]) > 0.5: fight += 1 #break if fight == 30: print("ALERT!!! CUT IT OUT GUYS") fight = 0 test = test + result # Break if escape key pressed if key_pressed == 27: break print(test, fight) # Release the capture and destroy any OpenCV windows cap.release() cv2.destroyAllWindows()
def infer_on_video(args,frame): # Convert the args for color and confidence args.c = convert_color(args.c) args.ct = float(args.ct) plugin = Network() plugin.load_model(args.m, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2,0,1)) p_frame = p_frame.reshape(1, *p_frame.shape) plugin.async_inference(p_frame) if plugin.wait() == 0: result = plugin.extract_output() frame = draw_boxes(frame, result, args, frame.shape[2],frame.shape[1]) return frame
def perform_inference(): plugin=Network() plugin.load_model(model=detection_model) b,c,h,w=plugin.get_input_shape() cap=cv2.VideoCapture(INPUT_STREAM) while(cap.isOpened()): flag,frame=cap.read() width=int(cap.get(3)) height=int(cap.get(4)) if not flag: break preprocessed_image=preprocessing(frame,h,w) plugin.async_inference(preprocessed_image) status=plugin.wait() if status==0: result=plugin.extract_output() #print(result.shape) f_image=extract_faces(frame,result,width,height) cv2.imshow('image',f_image) k=cv2.waitKey(1) & 0xFF if k==ord('q'): break cap.release() cv2.destroyAllWindows()
def infer_on_video(args): # Initialize the Inference Engine plugin = Network() # Load the network model into the IE plugin.load_model(args.m, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Process frames until the video ends, or process is exited counter = 0 incident_flag = False while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) counter += 1 # Pre-process the frame p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2,0,1)) p_frame = p_frame.reshape(1, *p_frame.shape) # Perform inference on the frame plugin.async_inference(p_frame) # Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() ### TODO: Process the output incident_flag = assess_scene(result, counter, incident_flag) # Break if escape key pressed if key_pressed == 27: break # Release the capture and destroy any OpenCV windows cap.release() cv2.destroyAllWindows()
def model_extraction(model,image): height,width=image.shape[0],image.shape[1] if (height!=None): plugin=Network() plugin.load_model(model=model) b,c,h,w=plugin.get_input_shape() p_image=preprocessing(image,h,w) plugin.async_inference(p_image) status=plugin.wait() if status==0: result=plugin.extract_output() face=extract_faces(image,result,width,height) return face else: return np.zeros((160,160,3))
def infer_on_video(args): args.c = convert_color(args.c) args.ct = float(args.ct) plugin = Network() plugin.load_model(args.m, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() cap = cv2.VideoCapture(args.i) cap.open(args.i) width = int(cap.get(3)) height = int(cap.get(4)) out = cv2.VideoWriter('trespasser_detected.mp4', 0x00000021, 30, (width, height)) while cap.isOpened(): flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) plugin.async_inference(p_frame) if plugin.wait() == 0: result = plugin.extract_output() frame = draw_boxes(frame, result, args, width, height) out.write(frame) if key_pressed == 27: break out.release() cap.release() cv2.destroyAllWindows()
def infer_on_photo(args): #Input: [1x3x62x62] - [1xCxHxW] #Outputs: # name: "age_conv3", shape: [1, 1, 1, 1] - Estimated age divided by 100. # name: "prob", shape: [1, 2, 1, 1] - Softmax output across 2 type classes [female, male] engine = Network() engine.load_model(args.m, args.d, CPU_EXTENSION) image = cv2.imread(args.i) net_shape = engine.get_input_shape() #[1, 3, 62, 62] image = preprocess(image, net_shape[2], net_shape[3]) #H, W engine.async_inference(image) if(engine.wait() == 0): output = engine.extract_outputs() #print(output) age = int(output['age_conv3'][0][0][0][0]*100) genderM = output['prob'][0][1][0][0] gender = "Masculine" if genderM > 0.5 else "Femenine" print("Age: %d, gender: %s" % (age, gender))
def infer(imarray): model = "asl-recognition-0003.xml" inet = Network.net(model) exec_net = Network.load_model(inet, imarray, 'CPU') input_blob = next(iter(exec_net.inputs)) input_layer = inet.inputs[input_blob].shape ##### asynch asy_net = Network.async_inference(exec_net, imarray, input_blob) output_blob = next(iter(exec_net.outputs)) enc_net = exec_net.requests[0].outputs[output_blob] ###### synch #syn_net = Network.inf_(exec_net, imarray, input_blob) #enc_net = Network.extract_output(syn_net, exec_net) code = (np.argmax(enc_net)) return code
def infer(imarray): model = "asl-recognition-0003.xml" inet = Network.net(model) exec_net = Network.load_model(inet, imarray, 'CPU') input_blob = next(iter(exec_net.inputs)) input_layer = inet.inputs[input_blob].shape ##### asynchronous inference asy_net = Network.async_inference(exec_net, imarray, input_blob) output_blob = next(iter(exec_net.outputs)) enc_net = exec_net.requests[0].outputs[output_blob] ###### synch - uncomment the 2 lines below and comment out the 3 lines below async to switch to synchronous inference #syn_net = Network.inf_(exec_net, imarray, input_blob) #enc_net = Network.extract_output(syn_net, exec_net) ###### continue code = (np.argmax(enc_net)) return code
def infer_on_video(args, model): # Connect to the MQTT server client = mqtt.Client() client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL) #Initialize the Inference Engine plugin = Network() ## Load the network model into the IE plugin.load_model(model, args.d, CPU_EXTENSION) net_input_shape = plugin.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) #Pre-process the frame p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) #Perform inference on the frame plugin.async_inference(p_frame) # Get the output of inference if plugin.wait() == 0: result = plugin.extract_output() ## Draw the output mask onto the input out_frame, classes = draw_masks(result, width, height) class_names = get_class_names(classes) speed = randint(50, 70) #Sedning the class names , speed details to the mqtt server client.publish("class", json.dumps({"class_names": class_names})) client.publish("seedometer", json.dumps({"speed": speed})) #send frame to the concerned server ,say, ffmpeg server here sys.stdout.buffer.write(out_frame) sys.stdout.flush() # Break if escape key pressed if key_pressed == 27: break # Release the capture, and destroy any OpenCV windows and disconnect form MQTT cap.release() cv2.destroyAllWindows() client.disconnect()
def infer_on_stream(args): """ Initialize the inference network, stream video to network, and output stats and video. """ image_mode = False video_mode = False # Initialise the class inference_network1 = Network() inference_network2 = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold #Loading the model, first model for face detection second for landmarks detection n1, c1, h1, w1 = inference_network1.load_model(args.model1, args.device) n2, c2, h2, w2 = inference_network2.load_model(args.model2, args.device) #Handling the input stream if args.input == 'CAM': inputstream = 0 video_mode = True elif args.input.endswith('jpg') or args.input.endswith( 'bmp') or args.input.endswith('png'): image_mode = True inputstream = args.input elif args.input.endswith('mp4') or args.input.endswith( 'flv') or args.input.endswith('avi'): inputstream = args.input video_mode = True else: print('Input not supported') #initializing inputstream capture cap = cv2.VideoCapture(inputstream) cap.open(inputstream) width = int(cap.get(3)) height = int(cap.get(4)) #Saving output video out = cv2.VideoWriter('output.mp4', 0x00000021, 24.0, (width, height)) #Looping until stream is over while cap.isOpened(): #Reading from the video capture flag, frame = cap.read() if not flag: break #inference start time inf_start = time.time() #Pre-processing the image for face detection p_frame1 = cv2.resize(frame, (w1, h1)) p_frame1 = p_frame1.transpose((2, 0, 1)) p_frame1 = p_frame1.reshape(1, *p_frame1.shape) #Starting asynchronous inference for face detection inference_network1.async_inference(p_frame1) #Waiting for the result if inference_network1.wait() == 0: #Getting the results of the inference request for face detection result1 = inference_network1.extract_output() #Extracting face s_width, s_height, crop_image, xmin, ymin = draw_boxes( frame, result1, args, width, height) #if no face detected in video if len(crop_image) == []: print('Face Not Detected') continue #Pre-processing the crop_image for facial landmarks detection p_frame2 = cv2.resize(crop_image, (w2, h2)) p_frame2 = p_frame2.transpose((2, 0, 1)) p_frame2 = p_frame2.reshape(1, *p_frame2.shape) #Starting asynchronous inference for facial landmark detection inference_network2.async_inference(p_frame2) #Waiting for the result if inference_network2.wait() == 0: #Inference end time det_time = time.time() - inf_start #Getting the results of the inference request result2 = inference_network2.extract_output() #drawing points on face frame = draw_points(frame, result2, s_width, s_height, xmin, ymin) #Extracting any desired stats from the results inf_time_message = "Inference time: {:.3f}ms".format(det_time * 1000) cv2.putText(frame, inf_time_message, (35, 35), cv2.FONT_HERSHEY_TRIPLEX, 1.5, (0, 250, 0), 2) #Writing an output image if single image was input if image_mode: cv2.imwrite('output_image.jpg', frame) else: out.write(frame) cap.release() cv2.destroyAllWindows() sys.stdout.flush()
def infer_on_video(args): # Converting arguments for color and confidence args.ct = float(args.ct) ### Initialize the Inference Engine plugin_car = Network() plugin_weather = Network() ### Load the network car-model into the IE plugin_car.load_model(CAR_MODEL, args.d, CPU_EXTENSION) ### Load the network wheather-model into the IE plugin_weather.load_model(WEATHER_MODEL, args.d, CPU_EXTENSION) ### Get net_input_shape for car and weather model net_input_shape_car = plugin_car.get_input_shape() net_input_shape_weather = plugin_weather.get_input_shape() # Get and open video capture cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, and `0x00000021` on Linux #out = cv2.VideoWriter('out3.mp4', 0x00000021, 30, (width,height)) # Process frames until the video ends, or process is exited while cap.isOpened(): # Read the next frame flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) ### Pre-process the frame for car p_frame_car = cv2.resize( frame, (net_input_shape_car[3], net_input_shape_car[2])) p_frame_car = p_frame_car.transpose((2, 0, 1)) p_frame_car = p_frame_car.reshape(1, *p_frame_car.shape) ### Pre-process the frame for car p_frame_weather = cv2.resize(frame, (255, 255)) p_frame_weather = p_frame_weather.transpose((2, 0, 1)) p_frame_weather = p_frame_weather.reshape(1, 3, 255, 255) ### Perform inference on the frame for car plugin_car.async_inference(p_frame_car) plugin_weather.async_inference(p_frame_weather) ### Get the output of inference if plugin_car.wait() == 0 and plugin_weather.wait() == 0: ### Get results for car and weather result = plugin_car.extract_output() weather_output = plugin_weather.extract_output_weather() ### Get highest value of weather_output weather_pred = np.argmax(weather_output['1168'].flatten()) ### Get text for highest value weather_text = WEATHER_TYPES[weather_pred] print("Weather_text: " + str(weather_text)) ### Update the frame to include detected bounding boxes visual = frame.copy() copied_frame = frame.copy() # Crop frame to the region of interest (see region_interest file) visual_cropped = cropImage(visual) # Convert Colorspace from BGR to HlS hls = cv2.cvtColor(visual_cropped, cv2.COLOR_BGR2HLS) # Define yellow color range for lane-lines (e.g. for US-roads) lower_yellow = np.array([16, 120, 80]) upper_yellow = np.array([21, 255, 255]) maskY = cv2.inRange(hls, lower_yellow, upper_yellow) # Define white color range for lane-line lower_white = np.array([0, 144, 0]) # lH, lL, lS upper_white = np.array([71, 234, 255]) # uH, uL, uS maskW = cv2.inRange(hls, lower_white, upper_white) # Combining both masks mask = maskW + maskY # Apply Closing on the mask kernel = np.ones((7, 7), np.uint8) visual = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) visual = np.stack((visual, ) * 3, axis=-1) # Add cars to visual visual, dist_to_car = draw_cars(visual, result, args, width, height, icon) # Add traffic sign to visual visual = draw_sign(frame, visual, sign) # Add Weather text visual = print_weather(frame, visual, weather_text) # Get speed of the car velocity = getSpeed(frame) global local_velocity if velocity != None: local_velocity = velocity else: pass # Add distance to visual visual = draw_distance(visual, local_velocity, dist_to_car) # Write out the frame #out.write(frame) cv2.imshow("Visual Output", visual) cv2.imshow("Orig", frame) # Break if escape key pressed if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows #out.release() cap.release() cv2.destroyAllWindows()
def capture_video(args): # Convert the args for confidence args.ct = float(args.ct) # initialize the inference engines for people and violence people_plugin = Network() violence_plugin = Network() # load the network models into the IE people_plugin.load_model(args.m, args.d, None) cap = cv2.VideoCapture(args.i) cap.open(args.i) # Grab the shape of the input width = int(cap.get(3)) height = int(cap.get(4)) # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, and `0x00000021` on Linux #video_code = cv2.VideoWriter_fourcc('M','J','P','G') #out = cv2.VideoWriter('out.mp4', video_code, 30, (width,height)) while cap.isOpened(): # Capture frame by frame flag, frame = cap.read() if not flag: break # cv2.imshow('Frame',frame) key_pressed = cv2.waitKey(60) #width and height of person detection model dsize = (544, 320) # get the current position of the video current_pos = cap.get(cv2.CAP_PROP_POS_MSEC) p_frame = cv2.resize(frame, dsize) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, 3, 320, 544) # Perform inference on the frame people_plugin.async_inference(p_frame) ### Get the output of inference if people_plugin.wait() == 0: people_result = people_plugin.extract_output() ### Update the frame to include detected bounding boxes output = inference_result(people_result, current_pos, args) # Write out the frame print(output) # Break if escape key pressed if key_pressed == 27: break # Release the , capture, and destroy any OpenCV windows cap.release() cv2.destroyAllWindows()