def main(): # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() # Get the model's input shape. We will use this information later to resize # images appropriately. input_shape = model.get_default_input_shape() # Declare a variable to hold the prediction times prediction_times = [] mean_time_to_predict = 0.0 while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. image = get_image_from_camera(camera) # Prepare an image for processing # - Resize and center-crop to the required width and height while # preserving aspect ratio. # - OpenCV gives the image in BGR order. If needed, re-order the # channels to RGB. # - Convert the OpenCV result to a std::vector<float> input_data = helpers.prepare_image_for_model(image, input_shape.columns, input_shape.rows) # Send the image to the compiled model and get the predictions numpy array # with scores, measure how long it takes start = time.time() predictions = model.predict(input_data) end = time.time() # Get the value of the top 5 predictions top_5 = helpers.get_top_n(predictions, 5) # Generate header text that represents the top5 predictions header_text = ", ".join([ "({:.0%}) {}".format(element[1], categories[element[0]]) for element in top_5 ]) helpers.draw_header(image, header_text) # Generate footer text that represents the mean evaluation time mean_time_to_predict = helpers.get_mean_duration( prediction_times, end - start) footer_text = "{:.0f}ms/frame".format(mean_time_to_predict * 1000) helpers.draw_footer(image, footer_text) # Display the image cv2.imshow("ELL model", image) print("Mean prediction time: {:.0f}ms/frame".format(mean_time_to_predict * 1000))
def get_results_ell(frame): with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() input_shape = model.get_default_input_shape() input_data = helpers.prepare_image_for_model( frame, input_shape.columns, input_shape.rows) predictions = model.predict(input_data) top_5 = helpers.get_top_n(predictions, 5) header_text = ", ".join(["({:.0%}) {}".format( element[1], categories[element[0]]) for element in top_5]) print("Printing header_text") print(header_text) return str(header_text)
def process_frame(frame, categories, frame_count, output_frame_path): if frame is None: print("Not valid input frame! Skip...") return # Get the model's input shape. We will use this information later to resize # images appropriately. input_shape = model.get_default_input_shape() # Get the model's output shape and create an array to hold the model's # output predictions output_shape = model.get_default_output_shape() predictions = model.FloatVector(output_shape.Size()) # Prepare an image for processing # - Resize and center-crop to the required width and height while # preserving aspect ratio. # - OpenCV gives the image in BGR order. If needed, re-order the # channels to RGB. # - Convert the OpenCV result to a std::vector<float> input_data = helpers.prepare_image_for_model( frame, input_shape.columns, input_shape.rows) # Send the image to the compiled model and fill the predictions vector # with scores, measure how long it takes start = time.time() model.predict(input_data, predictions) end = time.time() # Get the value of the top 5 predictions top_5 = helpers.get_top_n(predictions, 5) if (len(top_5) > 0): # Generate header text that represents the top5 predictions header_text = ", ".join(["({:.0%}) {}".format( element[1], categories[element[0]]) for element in top_5]) helpers.draw_header(frame, header_text) # Generate footer text that represents the mean evaluation time time_delta = end - start footer_text = "{:.0f}ms/frame".format(time_delta * 1000) helpers.draw_footer(frame, footer_text) # save the processed frame output_file_path = os.path.join(output_frame_path, "recognized_{}.png".format(frame_count)) cv2.imwrite(output_file_path, frame) print("Processed frame {}: header text: {}, footer text: {}".format(frame_count, header_text, footer_text)) return header_text else: print("Processed frame {}: No recognized frame!") return None
def main(): camera = cv2.VideoCapture(0) with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() input_shape = model.get_default_input_shape() output_shape = model.get_default_output_shape() while (cv2.waitKey(1) & 0xFF) != 27: # Get the image from the camera image = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper crops and resizes # the image maintaining proper aspect ratio and return the resultant # image instead of a numpy array. This is because we need to display # the image with the regions drawn on top. Additionally, the heper will # reorder the image from BGR to RGB image = helpers.prepare_image_for_model(image, input_shape.columns, input_shape.rows, reorder_to_rgb=True, ravel=False) input_data = image.astype(np.float32).ravel() # Get the predictions by running the model. `predictions` is returned # as a flat array predictions = model.predict(input_data) # Reshape the output of the model into a tensor that matches the # expected shape predictions = np.reshape( predictions, (output_shape.rows, output_shape.columns, output_shape.channels)) # Do some post-processing to extract the regions from the output of # the model regions = helpers.get_regions(predictions, categories, CONFIDENCE_THRESHOLD, ANCHOR_BOXES) # Get rid of any overlapping regions for the same object regions = helpers.non_max_suppression(regions, OVERLAP_THRESHOLD, categories) # Draw the regions onto the image helpers.draw_regions_on_image(image, regions) # Display the image image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("Region detection", image)
def main(): camera = cv2.VideoCapture(0) # request a specific resolution (sometimes the camera has very small default resolution) helpers.set_camera_resolution(camera, 1280, 720) with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() input_shape = model.get_default_input_shape() output_shape = model.get_default_output_shape() while (cv2.waitKey(1) & 0xFF) != 27: # Get the image from the camera original = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper crops and resizes # the image maintaining proper aspect ratio and return the resultant # image instead of a numpy array. Additionally, the helper will # reorder the image from BGR to RGB image, offset, scale = helpers.prepare_image_for_model( original, input_shape.columns, input_shape.rows, reorder_to_rgb=True, ravel=False) # Get the predictions by running the model. `predictions` is returned # as a flat array predictions = model.predict(image) # Reshape the output of the model into a tensor that matches the # expected shape predictions = np.reshape(predictions, (13, 13, 125)) # Do some post-processing to extract the regions from the output of # the model regions = helpers.get_regions(predictions, categories, CONFIDENCE_THRESHOLD, ANCHOR_BOXES) # Get rid of any overlapping regions for the same object regions = helpers.non_max_suppression(regions, OVERLAP_THRESHOLD, categories) # Draw the regions onto the image scale = (scale[0] * image.shape[1], scale[1] * image.shape[0]) helpers.draw_regions_on_image(original, regions, offset, scale) # Display the image cv2.imshow("Region detection", original)
def __init__(self, camera, cats, dogs): """Initializes this object with the camera source and model-related information""" model.Model.__init__(self) self.camera = camera self.dogs = dogs self.cats = cats # Get the model's input dimensions. We'll use this information later to # resize images appropriately. self.input_shape = model.get_default_input_shape() # Holds the image from the camera or other sources self.image = None
def model_predict(self, image): # Open the required categories.txt file used for identify the labels for recognized images with open("categories.txt", "r") as cat_file: categories = cat_file.read().splitlines() # Determine the right size and shape that the model wants input_shape = model.get_default_input_shape() # Get the given image ready for use with the model input_data = emanager.prepare_image_for_model(image, input_shape.columns, input_shape.rows) # Make the Model Prediction prediction = model.predict(input_data) # Return the max top 2 predictions if they exits top_2 = emanager.get_top_n(prediction, 2) # Make a decision on what to do based on the return prediction values if (len(top_2) < 1): # If nothing, return nothing return None, None else: # Something was recongized, give the name based on the categories file and give the value word = categories[top_2[0][0]] predict_value = top_2[0][1] return word, predict_value
colStart = int((image.shape[1] - image.shape[0]) / 2) colEnd = colStart + image.shape[0] # Center crop the image maintaining aspect ratio cropped = image[rowStart:rowEnd, colStart:colEnd] # Resize to model's requirements resized = cv2.resize(cropped, (requiredHeight, requiredWidth)) # Re-order if needed if not reorderToRGB: resized = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # Return as a vector of floats result = resized.astype(np.float).ravel() return result # Get the input and output shapes input_shape = model.get_default_input_shape() output_shape = model.get_default_output_shape() print("Model input shape: " + str([input_shape.rows, input_shape.columns, input_shape.channels])) print("Model output shape: " + str([output_shape.rows, output_shape.columns, output_shape.channels])) # Create a blank output of the appropriate size to hold the prediction results predictions = model.FloatVector(output_shape.Size()) # Read in the sample image image = cv2.imread("coffeemug.jpg") # Prepare the image to send to the model input = prepare_image_for_model(image, input_shape.columns, input_shape.rows)
def main(): # Import Notes: # 1) if the resolution to capture images changes in Unity, it needs to change here to be searched for and found # 2) Need to delay this one by a second or 2 to make sure we are never calling models on images that are not here yet. # 3) How is this script being called? # 4) Gui! # Define Globals image_val = 0 picW = 256 picH = 256 previous_dir = os.path.dirname(SCRIPT_DIR) # Intialize Log Properties logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') # Began running and stay running the entire project. logging.debug('Starting Our Seamless Journey') # Get the Label Names for the Model with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() # Format the Path of the image to look for specific sized images images_dir = "{0}/UnityImages".format(previous_dir) good_images_dir = "{0}/GoodImages".format(previous_dir) word_list_path = "{0}/wordlist.txt".format(SCRIPT_DIR) # Check if the Unity Images files exists, if not change up and make it print('Previous Directory: {0}'.format(previous_dir)) if not os.path.exists(images_dir): os.chdir(previous_dir) os.makedirs(images_dir) os.chdir(SCRIPT_DIR) if not os.path.exists(good_images_dir): os.chdir(previous_dir) os.makedirs(good_images_dir) os.chdir(SCRIPT_DIR) # If we are starting a new iteration of the program. Delete the old verision of the list. if os.path.exists(word_list_path): os.remove(word_list_path) while (True): image_path = 'screen_{0}x{1}_{2}.jpg'.format(picW, picH, image_val) full_image_path = '{0}/{1}'.format(images_dir, image_path) while not os.path.exists(full_image_path): print('It seems we dont have this image yet, we are waiting on it') time.sleep(1) # Now that we know the image file path, prepare it for object Detection print('This is where we proccess') prep_image2 = Image.open(full_image_path) prep_image2.show() prep_image = cv2.imread(full_image_path) input_shape = model.get_default_input_shape() input_data = emanager.prepare_image_for_model(prep_image, input_shape.columns, input_shape.rows) predictions = model.predict(input_data) top_5 = emanager.get_top_n(predictions, 5) if (len(top_5) < 1): print("We got nothing in this!") # Delete the Image File afterwards os.remove(full_image_path) else: # Get the word that the picture recongized word = categories[top_5[0][0]] output = "We actually got something back! Word: {0}! ImageNumber: {1}".format( word, image_val) print(output) # Move the Image From the current folder to the Good Images Folder good_images_path = "{0}/{1}".format(good_images_dir, image_path) shutil.move(full_image_path, good_images_path) # Attach the recongizined word name to a file or just show it with open(word_list_path, "a+") as f: f.write("{}\n".format(word)) time.sleep(1) # Increment the next image to look for image_val = image_val + 1
def main(): """Entry point for the script when called directly""" # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("categories.txt", "r") as categories_file,\ open("dogs.txt", "r") as dogs_file,\ open("cats.txt", "r") as cats_file: categories = categories_file.read().splitlines() dogs = dogs_file.read().splitlines() cats = cats_file.read().splitlines() # Get the model's input dimensions. We'll use this information later to # resize images appropriately. input_shape = model.get_default_input_shape() while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. If you'd like to use a different image, # load the image from some other source. image = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper: # - crops and resizes the image maintaining proper aspect ratio # - reorders the image channels if needed # - returns the data as a ravelled numpy array of floats so it can be # handed to the model input_data = helpers.prepare_image_for_model(image, input_shape.columns, input_shape.rows) # Get the predicted classes using the model's predict function on the # image input data. The predictions are returned as a numpy array with the # probability that the image # contains the class represented by that # index. predictions = model.predict(input_data) # Let's grab the value of the top prediction and its index, which # represents the top most confident match and the class or category it # belongs to. top_n = helpers.get_top_n(predictions, 1, threshold=0.05) # See whether the prediction is in one of our groups group = "" label = "" if top_n: top = top_n[0][0] if prediction_index_in_set(top, dogs): group = "Dog" elif prediction_index_in_set(top, cats): group = "Cat" header_text = "" if group: # A group was detected, so take action top = top_n[0] take_action(group) header_text = "({:.0%}) {}".format(top[1], group) helpers.draw_header(image, header_text) # Display the image using opencv cv2.imshow("Grouping", image)
def main(): # Open the video camera. To use a different camera, change the camera index. camera = cv2.VideoCapture(0) # Read the category names categories = open('categories.txt', 'r').read().splitlines() dogs = open('dogs.txt', 'r').read().splitlines() cats = open('cats.txt', 'r').read().splitlines() # Get the model's input dimensions. We'll use this information later to resize images appropriately. inputShape = model.get_default_input_shape() # Create a vector to hold the model's output predictions outputShape = model.get_default_output_shape() predictions = model.FloatVector(outputShape.Size()) headerText = "" while ((cv2.waitKey(1) & 0xFF) == 0xFF): # Get an image from the camera. If you'd like to use a different image, load the image from some other source. image = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper: # - crops and resizes the image maintaining proper aspect ratio # - reorders the image channels if needed # - returns the data as a ravelled numpy array of floats so it can be handed to the model input = helpers.prepare_image_for_model(image, inputShape.columns, inputShape.rows) # Get the predicted classes using the model's predict function on the image input data. # The predictions are returned as a vector with the probability that the image # contains the class represented by that index. model.predict(input, predictions) # Let's grab the value of the top prediction and its index, which represents the top most # confident match and the class or category it belongs to. topN = helpers.get_top_n(predictions, 1, threshold=0.05) # See whether the prediction is in one of our groups group = "" caption = "" label = "" if len(topN) > 0: top = topN[0] label = categories[top[0]] if label_in_set(label, dogs): group = "Dog" elif label_in_set(label, cats): group = "Cat" if not group == "": # A group was detected, so take action top = topN[0] take_action(group) headerText = "(" + str(int(top[1] * 100)) + "%) " + group else: # No group was detected headerText = "" helpers.draw_header(image, headerText) # Display the image using opencv cv2.imshow('Grouping', image)