def main(): camera = cv2.VideoCapture(0) with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() input_shape = model.get_default_input_shape() output_shape = model.get_default_output_shape() while (cv2.waitKey(1) & 0xFF) != 27: # Get the image from the camera image = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper crops and resizes # the image maintaining proper aspect ratio and return the resultant # image instead of a numpy array. This is because we need to display # the image with the regions drawn on top. Additionally, the heper will # reorder the image from BGR to RGB image = helpers.prepare_image_for_model(image, input_shape.columns, input_shape.rows, reorder_to_rgb=True, ravel=False) input_data = image.astype(np.float32).ravel() # Get the predictions by running the model. `predictions` is returned # as a flat array predictions = model.predict(input_data) # Reshape the output of the model into a tensor that matches the # expected shape predictions = np.reshape( predictions, (output_shape.rows, output_shape.columns, output_shape.channels)) # Do some post-processing to extract the regions from the output of # the model regions = helpers.get_regions(predictions, categories, CONFIDENCE_THRESHOLD, ANCHOR_BOXES) # Get rid of any overlapping regions for the same object regions = helpers.non_max_suppression(regions, OVERLAP_THRESHOLD, categories) # Draw the regions onto the image helpers.draw_regions_on_image(image, regions) # Display the image image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("Region detection", image)
def process_frame(frame, categories, frame_count, output_frame_path): if frame is None: print("Not valid input frame! Skip...") return # Get the model's input shape. We will use this information later to resize # images appropriately. input_shape = model.get_default_input_shape() # Get the model's output shape and create an array to hold the model's # output predictions output_shape = model.get_default_output_shape() predictions = model.FloatVector(output_shape.Size()) # Prepare an image for processing # - Resize and center-crop to the required width and height while # preserving aspect ratio. # - OpenCV gives the image in BGR order. If needed, re-order the # channels to RGB. # - Convert the OpenCV result to a std::vector<float> input_data = helpers.prepare_image_for_model( frame, input_shape.columns, input_shape.rows) # Send the image to the compiled model and fill the predictions vector # with scores, measure how long it takes start = time.time() model.predict(input_data, predictions) end = time.time() # Get the value of the top 5 predictions top_5 = helpers.get_top_n(predictions, 5) if (len(top_5) > 0): # Generate header text that represents the top5 predictions header_text = ", ".join(["({:.0%}) {}".format( element[1], categories[element[0]]) for element in top_5]) helpers.draw_header(frame, header_text) # Generate footer text that represents the mean evaluation time time_delta = end - start footer_text = "{:.0f}ms/frame".format(time_delta * 1000) helpers.draw_footer(frame, footer_text) # save the processed frame output_file_path = os.path.join(output_frame_path, "recognized_{}.png".format(frame_count)) cv2.imwrite(output_file_path, frame) print("Processed frame {}: header text: {}, footer text: {}".format(frame_count, header_text, footer_text)) return header_text else: print("Processed frame {}: No recognized frame!") return None
def main(): camera = cv2.VideoCapture(0) # request a specific resolution (sometimes the camera has very small default resolution) helpers.set_camera_resolution(camera, 1280, 720) with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() input_shape = model.get_default_input_shape() output_shape = model.get_default_output_shape() while (cv2.waitKey(1) & 0xFF) != 27: # Get the image from the camera original = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper crops and resizes # the image maintaining proper aspect ratio and return the resultant # image instead of a numpy array. Additionally, the helper will # reorder the image from BGR to RGB image, offset, scale = helpers.prepare_image_for_model( original, input_shape.columns, input_shape.rows, reorder_to_rgb=True, ravel=False) # Get the predictions by running the model. `predictions` is returned # as a flat array predictions = model.predict(image) # Reshape the output of the model into a tensor that matches the # expected shape predictions = np.reshape(predictions, (13, 13, 125)) # Do some post-processing to extract the regions from the output of # the model regions = helpers.get_regions(predictions, categories, CONFIDENCE_THRESHOLD, ANCHOR_BOXES) # Get rid of any overlapping regions for the same object regions = helpers.non_max_suppression(regions, OVERLAP_THRESHOLD, categories) # Draw the regions onto the image scale = (scale[0] * image.shape[1], scale[1] * image.shape[0]) helpers.draw_regions_on_image(original, regions, offset, scale) # Display the image cv2.imshow("Region detection", original)
colEnd = colStart + image.shape[0] # Center crop the image maintaining aspect ratio cropped = image[rowStart:rowEnd, colStart:colEnd] # Resize to model's requirements resized = cv2.resize(cropped, (requiredHeight, requiredWidth)) # Re-order if needed if not reorderToRGB: resized = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # Return as a vector of floats result = resized.astype(np.float).ravel() return result # Get the input and output shapes input_shape = model.get_default_input_shape() output_shape = model.get_default_output_shape() print("Model input shape: " + str([input_shape.rows, input_shape.columns, input_shape.channels])) print("Model output shape: " + str([output_shape.rows, output_shape.columns, output_shape.channels])) # Create a blank output of the appropriate size to hold the prediction results predictions = model.FloatVector(output_shape.Size()) # Read in the sample image image = cv2.imread("coffeemug.jpg") # Prepare the image to send to the model input = prepare_image_for_model(image, input_shape.columns, input_shape.rows)
def main(): """Entry point for the script when called directly""" # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("categories.txt", "r") as categories_file,\ open("dogs.txt", "r") as dogs_file,\ open("cats.txt", "r") as cats_file: categories = categories_file.read().splitlines() dogs = dogs_file.read().splitlines() cats = cats_file.read().splitlines() # Get the model's input dimensions. We'll use this information later to # resize images appropriately. input_shape = model.get_default_input_shape() # Create a vector to hold the model's output predictions predictions = model.FloatVector(model.get_default_output_shape().Size()) while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. If you'd like to use a different image, # load the image from some other source. image = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper: # - crops and resizes the image maintaining proper aspect ratio # - reorders the image channels if needed # - returns the data as a ravelled numpy array of floats so it can be # handed to the model input_data = helpers.prepare_image_for_model(image, input_shape.columns, input_shape.rows) # Get the predicted classes using the model's predict function on the # image input data. The predictions are returned as a vector with the # probability that the image # contains the class represented by that # index. model.predict(input_data, predictions) # Let's grab the value of the top prediction and its index, which # represents the top most confident match and the class or category it # belongs to. top_n = helpers.get_top_n(predictions, 1, threshold=0.05) # See whether the prediction is in one of our groups group = "" label = "" if top_n: top = top_n[0] label = categories[top[0]] if label_in_set(label, dogs): group = "Dog" elif label_in_set(label, cats): group = "Cat" header_text = "" if group: # A group was detected, so take action top = top_n[0] take_action(group) header_text = "({:.0%}) {}".format(top[1], group) helpers.draw_header(image, header_text) # Display the image using opencv cv2.imshow("Grouping", image)
def main(): # Open the video camera. To use a different camera, change the camera index. camera = cv2.VideoCapture(0) # Read the category names categories = open('categories.txt', 'r').read().splitlines() dogs = open('dogs.txt', 'r').read().splitlines() cats = open('cats.txt', 'r').read().splitlines() # Get the model's input dimensions. We'll use this information later to resize images appropriately. inputShape = model.get_default_input_shape() # Create a vector to hold the model's output predictions outputShape = model.get_default_output_shape() predictions = model.FloatVector(outputShape.Size()) headerText = "" while ((cv2.waitKey(1) & 0xFF) == 0xFF): # Get an image from the camera. If you'd like to use a different image, load the image from some other source. image = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper: # - crops and resizes the image maintaining proper aspect ratio # - reorders the image channels if needed # - returns the data as a ravelled numpy array of floats so it can be handed to the model input = helpers.prepare_image_for_model(image, inputShape.columns, inputShape.rows) # Get the predicted classes using the model's predict function on the image input data. # The predictions are returned as a vector with the probability that the image # contains the class represented by that index. model.predict(input, predictions) # Let's grab the value of the top prediction and its index, which represents the top most # confident match and the class or category it belongs to. topN = helpers.get_top_n(predictions, 1, threshold=0.05) # See whether the prediction is in one of our groups group = "" caption = "" label = "" if len(topN) > 0: top = topN[0] label = categories[top[0]] if label_in_set(label, dogs): group = "Dog" elif label_in_set(label, cats): group = "Cat" if not group == "": # A group was detected, so take action top = topN[0] take_action(group) headerText = "(" + str(int(top[1] * 100)) + "%) " + group else: # No group was detected headerText = "" helpers.draw_header(image, headerText) # Display the image using opencv cv2.imshow('Grouping', image)
def main(): # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() # Get the model's input shape. We will use this information later to resize # images appropriately. input_shape = model.get_default_input_shape() # Get the model's output shape and create an array to hold the model's # output predictions output_shape = model.get_default_output_shape() predictions = model.FloatVector(output_shape.Size()) # Declare a variable to hold the prediction times prediction_times = [] mean_time_to_predict = 0.0 while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. image = get_image_from_camera(camera) # Prepare an image for processing # - Resize and center-crop to the required width and height while # preserving aspect ratio. # - OpenCV gives the image in BGR order. If needed, re-order the # channels to RGB. # - Convert the OpenCV result to a std::vector<float> input_data = helpers.prepare_image_for_model(image, input_shape.columns, input_shape.rows) # Send the image to the compiled model and fill the predictions vector # with scores, measure how long it takes start = time.time() model.predict(input_data, predictions) end = time.time() # Get the value of the top 5 predictions top_5 = helpers.get_top_n(predictions, 5) # Generate header text that represents the top5 predictions header_text = ", ".join([ "({:.0%}) {}".format(element[1], categories[element[0]]) for element in top_5 ]) helpers.draw_header(image, header_text) # Generate footer text that represents the mean evaluation time mean_time_to_predict = helpers.get_mean_duration( prediction_times, end - start) footer_text = "{:.0f}ms/frame".format(mean_time_to_predict * 1000) helpers.draw_footer(image, footer_text) # Display the image cv2.imshow("ELL model", image) print("Mean prediction time: {:.0f}ms/frame".format(mean_time_to_predict * 1000))