def detect_objects(image_np, sess, detection_graph): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) return image_np
def predict(self, img): """ # Arguments img: a numpy array # Returns The url to an image with the bounding boxes """ def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) with self.graph.as_default(): with tf.Session(graph=self.graph) as sess: # Definite input and output Tensors for detection_graph image_tensor = self.graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = self.graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = self.graph.get_tensor_by_name('detection_scores:0') detection_classes = self.graph.get_tensor_by_name('detection_classes:0') num_detections = self.graph.get_tensor_by_name('num_detections:0') image = Image.fromarray(img) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), self.category_index, use_normalized_coordinates=True, line_thickness=8) im = Image.fromarray(image_np) filename = str(uuid.uuid4()) + '.jpg' save_dir = './outputs' if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, filename) im.save(save_path) return json.dumps({'output': filename})
def visualize_results(image, output_dict): """Returns the resulting image after being passed to the model. Args: image (ndarray): Original image given to the model. output_dict (dict): Dictionary with all the information provided by the model. Returns: image (ndarray): Visualization of the results form above. """ vis_util.visualize_boxes_and_labels_on_image_array( image, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=4) return image
def eval_image(image_path): image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection. print output_dict['detection_scores'], output_dict['detection_classes'] vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, min_score_thresh=0.1, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=2) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) plt.savefig("eval_images/test.png")
def detect(self, coordinates): """ Detection using tensorflow Parameters ---------- coordinates of video timeframes -- tuple with coordinates """ with self.detection_graph.as_default(): with tf.Session(graph=self.detection_graph) as sess: image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0') num_detections = self.detection_graph.get_tensor_by_name('num_detections:0') # if output directory is not empty, remove all images if os.listdir('./Saved_frames/') != 0: files = os.listdir('./Saved_frames/') for f in files: os.remove('./Saved_frames/' + f) # set a frame counter counter = 0 while (self.cap.isOpened()): ret, frame = self.cap.read() counter += 1 image_np_expanded = np.expand_dims(frame, axis=0) if ret == True: #Detection. (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # if score is above 80% box is visualised and image is saved # counter % 10 limits the number of frames saved if scores[0][0] > 0.8 and counter % 10 == 0: vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), self.category_index, use_normalized_coordinates=True, line_thickness=8) file_name = os.path.join('./Saved_frames/', str(counter) + '.jpg') cv2.imwrite(file_name, frame) #geotag image set_coordinates(file_name, coordinates[counter]) if self.create_map == True: self.photos_to_map(file_name, coordinates[counter]) self.out.write(frame) if self.play_video == True: cv2.imshow('Traffic sign detection', frame) # Close window when "Q" button pressed if cv2.waitKey(1) & 0xFF == ord('q'): break else: break # release the video capture and video, write objects, save map to html self.map.save(outfile='Map.html') self.cap.release() self.out.release() # Closes all the frames cv2.destroyAllWindows()
# i.e. a single-column array, where each item in the column has the pixel RGB value ret, frame = video.read() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_expanded = np.expand_dims(frame_rgb, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: frame_expanded}) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.uint8), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=min_conf_threshold) # All the results have been drawn on the frame, so it's time to display it. cv2.imshow('Object detector', frame) # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break # Clean up video.release() cv2.destroyAllWindows()
def pipeline(self, image): ShipDetection.frame += 1 ShipDetection.progress = int( (ShipDetection.frame / ShipDetection.max_frame) * 100) print(ShipDetection.progress) detection_graph = self.__ship_detection_graph category_index = self.__category_index with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_np = np.asarray(image) image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, min_score_thresh=.9, line_thickness=2) ################### Data analysis ################### print("") final_score = np.squeeze(scores) # scores r_count = 0 # counting r_score = [] # temp score, <class 'numpy.ndarray'> final_category = np.array( [category_index.get(i) for i in classes[0]]) # category r_category = np.array([]) # temp category for i in range(100): if scores is None or final_score[i] > 0.7: r_count = r_count + 1 r_score = np.append(r_score, final_score[i]) r_category = np.append(r_category, final_category[i]) if r_count > 0: print("Number of bounding boxes: ", r_count) print("") else: print("Not Detect") print("") for i in range(len(r_score)): # socre array`s length print("Object Num: {} , Category: {} , Score: {}%".format( i + 1, r_category[i]['name'], 100 * r_score[i])) print("") final_boxes = np.squeeze(boxes)[ i] # ymin, xmin, ymax, xmax xmin = final_boxes[1] ymin = final_boxes[0] xmax = final_boxes[3] ymax = final_boxes[2] location_x = (xmax + xmin) / 2 location_y = (ymax + ymin) / 2 min_location = location_y * 100 # print("final_boxes [ymin xmin ymax xmax]") # print("final_boxes", final_boxes) # if (min_location > 35): # cv2.putText(image_np, 'FAR', (100, 90), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), thickness=2) # else: # cv2.putText(image_np, 'NEAR', (100, 90), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 0, 0), # thickness=2) print("Location x: {}, y: {}".format( location_x, location_y)) print("") print("+ " * 30) return image_np
def processFrame(image): image_np = np.array(image) input_tensor = tf.convert_to_tensor(image_np) input_tensor = input_tensor[tf.newaxis, ...] detections = detect_fn(input_tensor) num_detections = int(detections.pop('num_detections')) detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()} detections['num_detections'] = num_detections detections['detection_classes'] = detections['detection_classes'].astype(np.int64) image_np_with_detections = image_np.copy() for i in range(len(detections['detection_classes'])): if (detections['detection_classes'][i] == 1): detections['detection_scores'][i] *= 2.19 # bicycle 3.1 3.05 elif (detections['detection_classes'][i] == 2): detections['detection_scores'][i] *= 2.7 # bus 2.75 2.7 2.6 2.5 elif (detections['detection_classes'][i] == 3): detections['detection_scores'][i] *= 5.4 # car 5.9 5.5 5.7 5.8 5.58 5.6 5.8 5.85 elif (detections['detection_classes'][i] == 4): detections['detection_scores'][i] *= 7.1 # Go 3.2 3.5 3.71 3.8 elif (detections['detection_classes'][i] == 5): detections['detection_scores'][i] *= 3.5 # motorcycle 4 3.8 elif (detections['detection_classes'][i] == 6): detections['detection_scores'][i] *= 3.5 # pedestrian 4.5 4.6 elif (detections['detection_classes'][i] == 7): detections['detection_scores'][i] *= 3 # rider elif (detections['detection_classes'][i] == 8): detections['detection_scores'][i] *= 5.8463 # Slow Down 3.5 5 6 elif (detections['detection_classes'][i] == 9): detections['detection_scores'][i] *= 3.5 # Stop 3.5 3.25 elif (detections['detection_classes'][i] == 10): detections['detection_scores'][i] *= 3.4 # traffic light 3.5 3.4 elif (detections['detection_classes'][i] == 11): detections['detection_scores'][i] *= 5.5 # traffic sign 5 4.5 elif (detections['detection_classes'][i] == 12): detections['detection_scores'][i] *= 2.38 # truck 2.2 2.3 2.4 3 2.7 2.55 2.375 2.3875 2.396 2.334 for i in range(len(detections['detection_classes']) - 1): for j in range(i + 1, len(detections['detection_classes'])): if (detections['detection_scores'][i] >= 0.8 and detections['detection_scores'][j] >= 0.8): x1, y1, x2, y2 = detections['detection_boxes'][i][1], detections['detection_boxes'][i][0], \ detections['detection_boxes'][i][3], detections['detection_boxes'][i][2] x3, y3, x4, y4 = detections['detection_boxes'][j][1], detections['detection_boxes'][j][0], \ detections['detection_boxes'][j][3], detections['detection_boxes'][j][2] if (not ((x2 <= x3 or x4 <= x1) and (y2 <= y3 or y4 <= y1))): lens = min(x2, x4) - max(x1, x3) wide = min(y2, y4) - max(y1, y3) areaCover = lens * wide areai = areaCover / ((x2 - x1) * (y2 - y1)) areaj = areaCover / ((x4 - x3) * (y4 - y3)) if (areai >= 0.7 or areaj >= 0.7): if (detections['detection_scores'][i] > detections['detection_scores'][j]): detections['detection_scores'][j] = 0 else: detections['detection_scores'][i] = 0 for i in range(len(detections)): if (detections['detection_scores'][i] >= 1): detections['detection_scores'][i] = 0.99 viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detections['detection_boxes'], detections['detection_classes'], detections['detection_scores'], category_index, use_normalized_coordinates=True, max_boxes_to_draw=20, min_score_thresh=.80, agnostic_mode=False) return image_np_with_detections
image_np = load_image_into_numpy_array(image) image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) #write images #save the detection result images cv2.imwrite( output_image_path + image_folder + '\\' + image_path.split('\\')[-1], image_np) s_boxes = boxes[scores > 0.5] s_classes = classes[scores > 0.5] s_scores = scores[scores > 0.5] #write table
def detect_objects(image_np, sess, detection_graph, category_index, mot_tracker): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_cropped = crop_img(image_np, np.array([0.32, 0.06, 0.9, 0.65])) # print(image_cropped.shape) # plt.imshow(image_cropped) # plt.show() # image_np_expanded = np.expand_dims(image_np, axis=0) image_np_expanded = np.expand_dims(image_cropped, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) trackers = mot_tracker.update(boxes[0]) person_ids = [i for i, e in enumerate(classes[0]) if e == 1] if len(person_ids) > 0: selected_person_id = person_ids[0] person_box = boxes[0][selected_person_id] person_score = scores[0][selected_person_id] try: person_tracker = trackers[selected_person_id] except: return image_np, False if person_score > 0.6: person_attr = {'age': 'NA', 'gender': 'NA', 'color': 'NA'} # print(person_attr) # override boxes boxes = np.expand_dims(person_box, axis=0) classes = [1] scores = np.expand_dims(person_score, axis=0) trackers = np.expand_dims(person_tracker, axis=0) person_attr = [person_attr] # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, boxes, classes, scores, trackers, person_attr, category_index, use_normalized_coordinates=True, line_thickness=3) return image_np, person_box return image_np, False
image = Image.open(media_path) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) output_dict = run_inference_for_single_image(image_np, detection_graph) person_index = np.where(np.array(output_dict['detection_classes']) == 1) pscore_array = np.array(output_dict['detection_scores'])[person_index] congestion = len(np.where(pscore_array >= 0.4)[0]) print(congestion) vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.4 ) Image.fromarray(image_np).save(media_root + "_output" + media_ext)
def receive(category_index, model, address, port, protocol, min_detections=10, min_confidence=0.3): # define netgear client with `receive_mode = True` and default settings #client = NetGear(receive_mode=True) client = NetGear( address=address, port=str(port), protocol=protocol, pattern=0, receive_mode=True, logging=True) # Define netgear client at Server IP address. # For detection thresholds confidence = 0 avg_confidence = 0 i = 0 # infinite loop while True: # TODO: FPS limit # receive frames from network frame = client.recv() # print(image_np) print('Image received') image_np = np.copy(frame) # check if frame is None if image_np is None: # if True break the infinite loop break # Actual detection. output_dict = run_inference_for_single_image(model, image_np) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks_reframed', None), use_normalized_coordinates=True, line_thickness=8) cv2.imshow('object_detection', cv2.resize(image_np, (800, 600))) # print the most likely max_label = category_index[1] max_score = output_dict['detection_scores'][0] # ['name'] if max_label['name'] == 'person': i += 1 confidence += max_score avg_confidence = confidence / i print('{} {}'.format(i, avg_confidence)) if i >= min_detections and avg_confidence >= min_confidence: print('HUMAN DETECTED! DEPLOY BORK BORK NOM NOM! {} {}'.format( i, avg_confidence)) i = 0 avg_confidence = 0 yield True key = cv2.waitKey(1) & 0xFF # check for 'q' key-press if key == ord("q"): # if 'q' key-pressed break out break # close output window cv2.destroyAllWindows() # safely close client client.close()
def run_inferences(video_capture, graph): with graph.as_default(): with tf.compat.v1.Session() as sess: # Get handles to input and output tensors ops = tf.compat.v1.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.compat.v1.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[1], image.shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') if video_capture.isOpened(): windowName = "Jetson TensorFlow Demo" width = 1280 height = 720 cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) cv2.resizeWindow(windowName, width, height) cv2.moveWindow(windowName, 0, 0) cv2.setWindowTitle(windowName, "Jetson TensorFlow Demo") font = cv2.FONT_HERSHEY_PLAIN showFullScreen = False while cv2.getWindowProperty(windowName, 0) >= 0: ret_val, frame = video_capture.read() # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(frame) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = sess.run( tensor_dict, feed_dict={image_tensor: image_np_expanded}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict[ 'detection_boxes'][0] output_dict['detection_scores'] = output_dict[ 'detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) displayBuf = cv2.resize(image_np, (width, height)) cv2.imshow(windowName, displayBuf) key = cv2.waitKey(10) if key == -1: continue elif key == 27: break elif key == ord('f'): if showFullScreen == False: cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) else: cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_NORMAL) showFullScreen = not showFullScreen else: print("Failed to open the camera.")
def detect_objects(image_np, sess, detection_graph, category_index): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') t1 = cv2.getTickCount() # Actual detection. out = sess.run([boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) (boxes, scores, classes, num_detections) = out vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) centerx = 0 centery = 0 key = '' rows = image_np.shape[0] cols = image_np.shape[1] num_detections = int(num_detections) for i in range(num_detections): classId = (out[2][0][i]) score = float(out[1][0][i]) bbox = [float(v) for v in out[0][0][i]] if score > 0.5: x = bbox[1] * cols y = bbox[0] * rows right = bbox[3] * cols bottom = bbox[2] * rows #cv2.rectangle(img, (int(x), int(y)), (int(right), int(bottom)), (125, 255, 51), thickness=1) cv2.circle(image_np, (int((x + right) / 2), int((y + bottom) / 2)), 8, (255, 255, 255), thickness=1) centerx = int((x + right) / 2) centery = int((y + bottom) / 2) # Get and print distance value in mm at the center of the object # We measure the distance camera - object using Euclidean distance #err, point_cloud_value = point_cloud.get_value(centerx, centery) #distance = depth[centery][centerx] #distance = math.sqrt(point_cloud_value[0] * point_cloud_value[0] + # point_cloud_value[1] * point_cloud_value[1] + # point_cloud_value[2] * point_cloud_value[2]) #error = math.fabs((distance - distance_pc))*100/distance #print(depth.shape) #print(str(distance) + ';'+ str(distance_pc) +';'+str(error)) #if not np.isnan(distance) and not np.isinf(distance): # distance = str(distance) #round() for int ## #print("object {0} Distance to Camera at ({1}, {2}): {3} mm\n".format(i,x, y, distance)) classes = np.squeeze(classes).astype(np.int32) if classes[i] in category_index.keys(): class_name = category_index[classes[i]]['name'] else: class_name = 'N/A' display_str = str(class_name) cv2.putText(image_np, "{0}".format(display_str), (int(centerx), int(centery)), cv2.FONT_HERSHEY_SIMPLEX, 1, (225, 255, 255), thickness=2, lineType=2) # Increment the loop # else: # print("Can't estimate distance at this position, move the camera\n") # sys.stdout.flush() t2 = cv2.getTickCount() print((t2 - t1) / cv2.getTickFrequency()) return image_np
def detect(self, image_np, gt_box=None): image = image_np.copy() # image_np_expanded = np.expand_dims(image_np, axis=0) output_dict = self.run_inference_for_single_image(image_np) if gt_box is not None: vis_util.draw_bounding_boxes_on_image_array(image, np.array([gt_box]), color='black', thickness=4) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], self.category_index, min_score_thresh=self.min_score_thresh, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, skip_scores=False, skip_labels=True, line_thickness=4) # cv2.imshow('Simulation', image) # cv2.waitKey(10) bboxes = output_dict['detection_boxes'] classes = output_dict['detection_classes'] scores = output_dict['detection_scores'] bboxes = [ bbox for bbox, _ in sorted( zip(bboxes, scores), key=lambda pair: pair[1], reverse=True) ] classes = [ clss for clss, _ in sorted( zip(classes, scores), key=lambda pair: pair[1], reverse=True) ] scores = sorted(scores, key=lambda x: x, reverse=True) im_height, im_width = image_np.shape[0:2] for i in range(len(bboxes)): if scores is None or scores[i] > self.min_score_thresh: if classes[i] in self.category_index.keys(): class_name = self.category_index[classes[i]]['name'] if class_name == 'car': box = tuple(bboxes[i].tolist()) ymin, xmin, ymax, xmax = box left = xmin * im_width right = xmax * im_width top = ymin * im_height bottom = ymax * im_height POS_X = (left + right - im_width) / 2.0 POS_Y = (im_height - top - bottom) / 2.0 WIDTH = right - left HEIGHT = bottom - top return (POS_X, POS_Y, WIDTH, HEIGHT) return None
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections, predictions_dict, shapes = detect_fn(input_tensor) label_id_offset = 1 boxes = detections['detection_boxes'][0].numpy() classes = detections['detection_classes'][0].numpy() classes_int = (classes + label_id_offset).astype(int) scores = detections['detection_scores'][0].numpy() viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, boxes, classes_int, scores, category_index, use_normalized_coordinates=True, max_boxes_to_draw=200, min_score_thresh=.70, agnostic_mode=False) if writer is not None: writer.write(image_np_with_detections) # Display output cv2.imshow('object detection', cv2.resize(image_np_with_detections, (600, 600))) key = cv2.waitKey(1) & 0xFF if key == ord("q"):
def get_preds(frame): global previous global label global item global speak global groceryItems global isOccupied num = 1 # 0 -> grocery item recog, 1 -> barcode scanner image_np = np.array(frame) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) image_np_with_detections = image_np.copy() detections = detect_item(input_tensor) num_detections = int(detections.pop('num_detections')) detections = { key: value[0, :num_detections].numpy() for key, value in detections.items() } boxes = detections['detection_boxes'] classes = detections['detection_classes'].astype(np.int64) scores = detections['detection_scores'] detections['num_detections'] = num_detections detections['detection_classes'] = detections['detection_classes'].astype( np.int64) label_id_offset = 1 # LOOP THROUGH BOXES for i, b in enumerate(boxes[0]): if (scores[i] < 0.8): label = "No item found" + ", " + str(scores[0]) item = "No item found" else: label = str(category_index[classes[0] + 1]) + ", " + str( scores[0] * 100) + "%" item = str(category_index[detections['detection_classes'][0] + 1]['name']) if (item != previous): previous = item # sayAudio(item) # itemThread(sayItem) print(item) if item.lower() == str(groceryItems[0]).lower(): viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, np.squeeze(boxes), np.squeeze(classes + label_id_offset), np.squeeze(scores), category_index, use_normalized_coordinates=True, max_boxes_to_draw=5, min_score_thresh=.8, agnostic_mode=False, line_thickness=8) groceryItems.remove(item) if isProduct(item.lower()): itemThread(sayItem) req = requestUser( "This item is on your shopping list, would you like to scan the barcode?" ) if req == "yes": item = "Launching barcode scanner" itemThread(sayItem) num = 2 else: request = requestUser( "Would you like to continue scanning grocery items?" ) if request == 'yes': item = "Ok, please continue scanning" itemThread(item) num = 1 else: item = "Ok, please continue shopping" itemThread(sayItem) isOccupied = False num = 0 return image_np_with_detections, num, isOccupied
def run_inference_for_single_image(self, image_path, min_score): # Get handles to input and output tensors image = self.build_image_f(image_path) ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') # Run inference output_dict = self.tf_session.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict['detection_classes'][ 0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] # Filtramos resultados que no sean de la clase player index_to_remove = [] for i, c in enumerate(output_dict['detection_classes']): if c != 2: index_to_remove.append(i) output_dict['num_detections'] -= len(index_to_remove) output_dict['detection_classes'] = np.delete( output_dict['detection_classes'], index_to_remove) output_dict['detection_boxes'] = np.delete( output_dict['detection_boxes'], index_to_remove, axis=0) output_dict['detection_scores'] = np.delete( output_dict['detection_scores'], index_to_remove) # Draw ejemplos visuales if self.debug_path is not None: current_frame = skimage.io.imread(image_path) vis_util.visualize_boxes_and_labels_on_image_array( current_frame, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], self.category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=4) folder_debug_image_path = Path( self.debug_path) / Path(image_path).parent.name folder_debug_image_path.mkdir(parents=True, exist_ok=True) out_debug_image_path = folder_debug_image_path / Path( image_path).name Image.fromarray(current_frame).save(out_debug_image_path) # Return boxes.. return_boxes = [] return_scores = [] for i, (b, s) in enumerate( zip(output_dict['detection_boxes'], output_dict['detection_scores'])): if s >= min_score: b_xmin = b[1] * image.shape[1] b_ymin = b[0] * image.shape[0] b_xmax = b[3] * image.shape[1] b_ymax = b[2] * image.shape[0] return_boxes.append((b_xmin, b_ymin, b_xmax, b_ymax)) return_scores.append(s) return return_boxes, return_scores
def overlay_all_on_image(self, image: np.ndarray, inplace=True, score_threshold: float = 0.5, max_detections: int = 20) -> np.ndarray: """Overlays deteced objects and their associated visuals on an image Args: image (np.ndarray): the image on which to overlay results; loaded into memory as a numpy array in the RGB colorspace (height, width, 3) inplace (bool, optional): Defaults to True. Whether to modify the input image directly or make a copy before adding visualized results. The function will return an image with overlaid results either way. score_threshold (float, optional): Defaults to 0.5. A threshold with to ignore the visualization of low-confidence detections. max_detections (int, optional): Defaults to 20. The maximum number of detected objects to display on the image. Returns: np.ndarray: the image with the associated visuals overlaid """ if not inplace: image = image.copy() all_detected_objects = [] for detections in self.values(): all_detected_objects.extend(detections) all_detected_objects.sort(key=lambda obj: obj.confidence, reverse=True) # construct a category index (this does not necessarily match that # of the detection model; we can just construct an equivalent mapping # such that the visualization utils associate the proper strings with # each detected object), and construct a dict for quick lookups in the # subsequent section category_index = {} label_to_int = {} for i, label in enumerate(self.keys()): category_index[i + 1] = {"id": i + 1, "name": label} label_to_int[label] = i + 1 # TODO: handle empty list if isinstance(all_detected_objects[0], DetectedBBox): boxes = np.zeros((len(all_detected_objects), 4), dtype=np.float32) classes = np.zeros((len(all_detected_objects), ), dtype=np.uint8) scores = np.zeros((len(all_detected_objects), ), dtype=np.float32) for i, obj in enumerate(all_detected_objects): classes[i] = label_to_int[obj.label] scores[i] = obj.confidence boxes[i] = np.array([obj.ymin, obj.xmin, obj.ymax, obj.xmax]) return vis_util.visualize_boxes_and_labels_on_image_array( image, boxes, classes, scores, category_index, use_normalized_coordinates=True, max_boxes_to_draw=max_detections, min_score_thresh=score_threshold, ) else: return image
image = cv2.imread(PATH_TO_IMAGE) image_expanded = np.expand_dims(image, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) # Draw the results of the detection (aka 'visualize the results') vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=2, max_boxes_to_draw= 5, #https://stats.stackexchange.com/questions/297796/faster-r-cnn-how-to-avoid-multiple-detection-in-same-area min_score_thresh=0.0) # from google.colab.patches import cv2_imshow # matplotlib inline # All the results have been drawn on the image. Now display the image. cv2.imshow("mat", image) # Press any key to close the image cv2.waitKey(0) # Clean up
def visualize_detection_results(result_dict, tag, global_step, categories, summary_dir='', export_dir='', agnostic_mode=False, show_groundtruth, groundtruth_box_visualization_color='red', min_score_thresh=.5, max_num_predictions=200, skip_scores=False, skip_labels=False, keep_image_id_for_visualization_export=False): """Visualizes detection results and writes visualizations to image summaries. This function visualizes an image with its detected bounding boxes and writes to image summaries which can be viewed on tensorboard. It optionally also writes images to a directory. In the case of missing entry in the label map, unknown class name in the visualization is shown as "N/A". Args: result_dict: a dictionary holding groundtruth and detection data corresponding to each image being evaluated. The following keys are required: 'original_image': a numpy array representing the image with shape [1, height, width, 3] 'detection_boxes': a numpy array of shape [N, 4] 'detection_scores': a numpy array of shape [N] 'detection_classes': a numpy array of shape [N] The following keys are optional: 'groundtruth_boxes': a numpy array of shape [N, 4] 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] Detections are assumed to be provided in decreasing order of score and for display, and we assume that scores are probabilities between 0 and 1. tag: tensorboard tag (string) to associate with image. global_step: global step at which the visualization are generated. categories: a list of dictionaries representing all possible categories. Each dict in this list has the following keys: 'id': (required) an integer id uniquely identifying this category 'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza' 'supercategory': (optional) string representing the supercategory e.g., 'animal', 'vehicle', 'food', etc summary_dir: the output directory to which the image summaries are written. export_dir: the output directory to which images are written. If this is empty (default), then images are not exported. agnostic_mode: boolean (default: False) controlling whether to evaluate in class-agnostic mode or not. show_groundtruth: boolean (default: False) controlling whether to show groundtruth boxes in addition to detected boxes groundtruth_box_visualization_color: box color for visualizing groundtruth boxes min_score_thresh: minimum score threshold for a box to be visualized max_num_predictions: maximum number of detections to visualize skip_scores: whether to skip score when drawing a single detection skip_labels: whether to skip label when drawing a single detection keep_image_id_for_visualization_export: whether to keep image identifier in filename when exported to export_dir Raises: ValueError: if result_dict does not contain the expected keys (i.e., 'original_image', 'detection_boxes', 'detection_scores', 'detection_classes') """ detection_fields = fields.DetectionResultFields input_fields = fields.InputDataFields if not set([ input_fields.original_image, detection_fields.detection_boxes, detection_fields.detection_scores, detection_fields.detection_classes, ]).issubset(set(result_dict.keys())): raise ValueError('result_dict does not contain all expected keys.') if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: raise ValueError( 'If show_groundtruth is enabled, result_dict must contain ' 'groundtruth_boxes.') logging.info('Creating detection visualizations.') category_index = label_map_util.create_category_index(categories) image = np.squeeze(result_dict[input_fields.original_image], axis=0) detection_boxes = result_dict[detection_fields.detection_boxes] detection_scores = result_dict[detection_fields.detection_scores] detection_classes = np.int32( (result_dict[detection_fields.detection_classes])) detection_keypoints = result_dict.get(detection_fields.detection_keypoints) detection_masks = result_dict.get(detection_fields.detection_masks) detection_boundaries = result_dict.get( detection_fields.detection_boundaries) # Plot groundtruth underneath detections if show_groundtruth: groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] groundtruth_keypoints = result_dict.get( input_fields.groundtruth_keypoints) vis_utils.visualize_boxes_and_labels_on_image_array( image=image, boxes=groundtruth_boxes, classes, scores, category_index=category_index, keypoints=groundtruth_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=30, groundtruth_box_visualization_color= groundtruth_box_visualization_color) vis_utils.visualize_boxes_and_labels_on_image_array( image, detection_boxes, detection_classes, detection_scores, category_index, instance_masks=detection_masks, instance_boundaries=detection_boundaries, keypoints=detection_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=max_num_predictions, min_score_thresh=min_score_thresh, agnostic_mode=agnostic_mode, skip_scores=skip_scores, skip_labels=skip_labels) if export_dir: if keep_image_id_for_visualization_export and result_dict[ fields.InputDataFields().key]: export_path = os.path.join( export_dir, 'export-{}-{}.png'.format( tag, result_dict[fields.InputDataFields().key])) else: export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) vis_utils.save_image_array_as_png(image, export_path) summary = tf.Summary(value=[ tf.Summary.Value(tag=tag, image=tf.Summary.Image( encoded_image_string=vis_utils. encode_image_array_as_png_str(image))) ]) summary_writer = tf.summary.FileWriterCache.get(summary_dir) summary_writer.add_summary(summary, global_step) logging.info('Detection visualizations written to summary with tag %s.', tag)
label_id_offset = 0 image_np_with_detections = image_np.copy() # Use keypoints if available in detections keypoints, keypoint_scores = None, None if 'detection_keypoints' in result: keypoints = result['detection_keypoints'][0] keypoint_scores = result['detection_keypoint_scores'][0] viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections[0], result['detection_boxes'][0], (result['detection_classes'][0] + label_id_offset).astype(int), result['detection_scores'][0], category_index, use_normalized_coordinates=True, max_boxes_to_draw=200, min_score_thresh=.30, agnostic_mode=False, keypoints=keypoints, keypoint_scores=keypoint_scores, keypoint_edges=COCO17_HUMAN_POSE_KEYPOINTS) # plt.figure(figsize=(24,32)) # plt.imshow(image_np_with_detections[0]) # plt.show() # cv2.imshow('image_np_with_detections[0]',image_np_with_detections[0]) # cv2.waitKey() # %% [markdown] # ## [Optional]
'detection_boxes:0') scores = tf.get_default_graph().get_tensor_by_name( 'detection_scores:0') classes = tf.get_default_graph().get_tensor_by_name( 'detection_classes:0') num_detections = tf.get_default_graph().get_tensor_by_name( 'num_detections:0') (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), # output_dict['detection_boxes'], # output_dict['detection_classes'], # output_dict['detection_scores'], category_index, # instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) cv2.imshow('object_detection', cv2.resize(image_np, (800, 800))) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break # ops = tf.get_default_graph().get_operations() # all_tensor_names = {output.name for op in ops for output in op.outputs} # tensor_dict = {} # for key in [
ip.detection(sess, image_np) boxes_s, classes_s, scores_s = ip.boxes_s, ip.classes_s, ip.scores_s stop = time.time() time_tot = time_tot + stop - start print(stop - start) (h, w) = image_np.shape[:2] # Visualization of the results of a detection vis_util.visualize_boxes_and_labels_on_image_array( image_np, boxes_s, classes_s.astype(np.int32), scores_s, ip.category_index, use_normalized_coordinates=False, line_thickness=3, min_score_thresh= 0, # Objects above threshold are already selected before ) cv2.imshow('Detection' + str(i), image_np) cv2.imwrite('detect_image' + str(photo_idx) + '.png', cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)) photo_idx = photo_idx + 1 #Write detections in txt files f = open(DETECTION_FILE + "file" + str(i) + ".txt", "x") LIST = [1, 0, 3, 2] # xmin, ymin, xmax, ymax
# Read frame from camera ret, image_np = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections, predictions_dict, shapes = detect_fn(input_tensor) label_id_offset = 1 image_np_with_detections = image_np.copy() viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detections['detection_boxes'][0].numpy(), (detections['detection_classes'][0].numpy() + label_id_offset).astype(int), detections['detection_scores'][0].numpy(), category_index, use_normalized_coordinates=True, max_boxes_to_draw=200, min_score_thresh=.30, agnostic_mode=False) # Display output cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600))) if cv2.waitKey(25) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
display_scores = [] for e in ensembled: x, y, w, h, c, conf = e print_boxes.append('{0} {1} {2} {3} {4}'.format(conf, x, y, w, h)) display_boxes.append([y, x, y + h, x + w]) display_classes.append(c) display_scores.append(conf) submit_dict['patientId'].append(pid) submit_dict['PredictionString'].append(' '.join(print_boxes)) im = pydicom.read_file(os.path.join(args.dicom_dir, pid + '.dcm')) im = im.pixel_array im = np.stack((im, ) * 3, -1) if display_boxes: visutil.visualize_boxes_and_labels_on_image_array( im, np.array(display_boxes), display_classes, display_scores, {1: { 'id': 1, 'name': 'pneumonia' }}, use_normalized_coordinates=False, min_score_thresh=args.conf_thresh, ) Image.fromarray(im).save( os.path.join(args.out_images_dir, pid + '.jpg')) pd.DataFrame(submit_dict).to_csv(args.ensembled_submission, index=False)
HEIGHT = image_np.shape[0] image_np_expanded = np.expand_dims(image_np, axis=0) return image_np, image_np_expanded, WIDTH, HEIGHT img_path = 'data/images/G5_47.jpg' with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') image_np, image_np_expanded, WIDTH, HEIGHT = read_image(img_path) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}) boxes = np.reshape(boxes, (-1, boxes.shape[-1])) scores = np.reshape(scores, (-1)) classes = np.reshape(classes, (-1)).astype(np.int32) vis_util.visualize_boxes_and_labels_on_image_array(image_np, boxes, classes, scores, category_index, use_normalized_coordinates=True, line_thickness=8) # cv2.imwrite('detection.png', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) img_detection = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) cv2.imshow("detection", img_detection) cv2.waitKey(0)
def get_localization(self, image, visual=False): """Determines the locations of the traffic light in the image Args: image: camera image Returns: list of integers: coordinates [x_left, y_up, x_right, y_down] """ category_index = { 1: { 'id': 1, 'name': u'person' }, 2: { 'id': 2, 'name': u'bicycle' }, 3: { 'id': 3, 'name': u'car' }, 4: { 'id': 4, 'name': u'motorcycle' }, 5: { 'id': 5, 'name': u'airplane' }, 6: { 'id': 6, 'name': u'bus' }, 7: { 'id': 7, 'name': u'train' }, 8: { 'id': 8, 'name': u'truck' }, 9: { 'id': 9, 'name': u'boat' }, 10: { 'id': 10, 'name': u'traffic light' }, 11: { 'id': 11, 'name': u'fire hydrant' }, 13: { 'id': 13, 'name': u'stop sign' }, 14: { 'id': 14, 'name': u'parking meter' } } with self.detection_graph.as_default(): image_expanded = np.expand_dims(image, axis=0) (boxes, scores, classes, num_detections) = self.sess.run( [self.boxes, self.scores, self.classes, self.num_detections], feed_dict={self.image_tensor: image_expanded}) if visual == True: vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, min_score_thresh=.2, line_thickness=3) plt.figure(figsize=(9, 6)) plt.imshow(image) plt.show() boxes = np.squeeze(boxes) classes = np.squeeze(classes) scores = np.squeeze(scores) cls = classes.tolist() # Find the first occurence of traffic light detection id=10 idx = next((i for i, v in enumerate(cls) if v == 10.), None) # If there is no detection if idx == None: box = [0, 0, 0, 0] print('no detection!') # If the confidence of detection is too slow, 0.3 for simulator elif scores[idx] <= 0.02: box = [0, 0, 0, 0] print('low confidence:', scores[idx]) #If there is a detection and its confidence is high enough else: #*************corner cases*********************************** dim = image.shape[0:2] box = self.box_normal_to_pixel(boxes[idx], dim) box_h = box[2] - box[0] box_w = box[3] - box[1] ratio = box_h / (box_w + 0.01) # if the box is too small, 20 pixels for simulator if (box_h < 10) or (box_w < 10): box = [0, 0, 0, 0] print('box too small!', box_h, box_w) # if the h-w ratio is not right, 1.5 for simulator elif (ratio < 1.5): box = [0, 0, 0, 0] print('wrong h-w ratio', ratio) else: print(box) print('localization confidence: ', scores[idx]) #****************end of corner cases*********************** self.tl_box = box return box
def object_detect(file1, file2): import numpy as np import tensorflow as tf import scipy.misc from PIL import Image if tf.__version__ != '1.4.0': raise ImportError( 'Please upgrade your tensorflow installation to v1.4.0!') from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util MODEL_NAME = 'training' PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' PATH_TO_LABELS = MODEL_NAME + '/object-detection.pbtxt' NUM_CLASSES = 1 detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') image = Image.open(file1) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=4) scipy.misc.imsave(file2, image_np)
def handle(self, input_frame): if input_frame.payload_type != gabriel_pb2.PayloadType.IMAGE: status = gabriel_pb2.ResultWrapper.Status.WRONG_INPUT_FORMAT return cognitive_engine.create_result_wrapper(status) extras = cognitive_engine.unpack_extras(openscout_pb2.Extras, input_frame) if extras.model != '' and extras.model != self.model: if not os.path.exists('./model/' + extras.model): logger.error( f"Model named {extras.model} not found. Sticking with previous model." ) else: self.detector = TFPredictor(extras.model) self.model = extras.model output_dict, image_np = self.process_image(input_frame.payloads[0]) status = gabriel_pb2.ResultWrapper.Status.SUCCESS result_wrapper = cognitive_engine.create_result_wrapper(status) result_wrapper.result_producer_name.value = self.ENGINE_NAME if self.store_detections: try: vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(output_dict['detection_boxes']), np.squeeze(output_dict['detection_classes']), np.squeeze(output_dict['detection_scores']), self.detector.category_index, use_normalized_coordinates=True, min_score_thresh=self.threshold, line_thickness=4) img = Image.fromarray(image_np) draw = ImageDraw.Draw(img) draw.bitmap((0, 0), self.watermark, fill=None) bio = BytesIO() img.save(bio, format="JPEG") filename = str(time.time()) + ".png" path = self.storage_path + filename except IndexError as e: logger.error(f"IndexError while getting bounding boxes [{e}]") return result_wrapper if output_dict['num_detections'] > 0: #convert numpy arrays to python lists classes = output_dict['detection_classes'].tolist() boxes = output_dict['detection_boxes'].tolist() scores = output_dict['detection_scores'].tolist() result = gabriel_pb2.ResultWrapper.Result() result.payload_type = gabriel_pb2.PayloadType.TEXT detections_above_threshold = False r = "" for i in range(0, len(classes)): if (scores[i] > self.threshold): if self.exclusions is None or classes[ i] not in self.exclusions: detections_above_threshold = True logger.info("Detected : {} - Score: {:.3f}".format( self.detector.category_index[classes[i]]['name'], scores[i])) if i > 0: r += ", " r += "Detected {} ({:.3f})".format( self.detector.category_index[classes[i]]['name'], scores[i]) if self.store_detections: detection_log.info("{},{},{},{},{:.3f},{}".format( extras.client_id, extras.location.latitude, extras.location.longitude, self.detector.category_index[ classes[i]]['name'], scores[i], os.environ["WEBSERVER"] + "/" + filename)) else: detection_log.info("{},{},{},{},{:.3f},".format( extras.client_id, extras.location.latitude, extras.location.longitude, self.detector.category_index[ classes[i]]['name'], scores[i])) if detections_above_threshold: result.payload = r.encode(encoding="utf-8") result_wrapper.results.append(result) if self.store_detections: logger.info("Stored image: {}".format(path)) img.save(path) return result_wrapper
# expand image dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value image = cv2.imread(PATH_TO_IMAGE) image_expanded = np.expand_dims(image, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.80) # All the results have been drawn on image. Now display the image. cv2.imshow('Object detector', image) # Press any key to close the image cv2.waitKey(0) # Clean up cv2.destroyAllWindows()
def getPrediction(self): # Load the Tensorflow model into memory. # Load image using OpenCV and # expand image dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value sess = tf.Session(graph=self.detection_graph) image = cv2.imread(self.PATH_TO_IMAGE) image_expanded = np.expand_dims(image, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run([ self.detection_boxes, self.detection_scores, self.detection_classes, self.num_detections ], feed_dict={self.image_tensor: image_expanded}) result = scores.flatten() res = [] for idx in range(0, len(result)): if result[idx] > .40: res.append(idx) top_classes = classes.flatten() # Selecting class 2 and 3 # top_classes = top_classes[top_classes > 1] res_list = [top_classes[i] for i in res] class_final_names = [self.class_names_mapping[x] for x in res_list] top_scores = [e for l2 in scores for e in l2 if e > 0.30] # final_output = list(zip(class_final_names, top_scores)) # print(final_output) # new_classes = classes.flatten() new_scores = scores.flatten() new_boxes = boxes.reshape(300, 4) # get all boxes from an array max_boxes_to_draw = new_boxes.shape[0] # this is set as a default but feel free to adjust it to your needs min_score_thresh = .30 # iterate over all objects found # boundingBox = {} # for i in range(min(max_boxes_to_draw, new_boxes.shape[0])): # if new_scores is None or new_scores[i] > min_score_thresh: # boundingBox[class_final_names[i]] = new_boxes[i] # print("Bounding Boxes of", class_final_names[i], new_boxes[i]) listOfOutput = [] for (name, score, i) in zip(class_final_names, top_scores, range(min(max_boxes_to_draw, new_boxes.shape[0]))): valDict = {} valDict["className"] = name valDict["confidence"] = str(score) if new_scores is None or new_scores[i] > min_score_thresh: val = list(new_boxes[i]) valDict["yMin"] = str(val[0]) valDict["xMin"] = str(val[1]) valDict["yMax"] = str(val[2]) valDict["xMax"] = str(val[3]) listOfOutput.append(valDict) # new_boxes = boxes.reshape(100,4) # print(new_boxes) # print(type(new_boxes)) # print(new_boxes.shape) # print(boxes.shape) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), self.category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.60) output_filename = 'output4.jpg' cv2.imwrite(output_filename, image) opencodedbase64 = encodeImageIntoBase64("output4.jpg") # json_image = dict(zip(img_dict, image_64_encode_list)) # print(open_output_image) # plt.savefig(PATH + '\\' + arr.split('.')[0] + '_labeled.jpg') # cv2.waitKey(0) # cv2.destroyAllWindows() # # # All the results have been drawn on image. Now display the image. #cv2.imshow('Object detector', image) # # # Press any key to close the image #cv2.waitKey(0) # # # Clean up #cv2.destroyAllWindows() listOfOutput.append({"image": opencodedbase64.decode('utf-8')}) return listOfOutput
def visualize_detection_results(result_dict, tag, global_step, categories, summary_dir='', export_dir='', agnostic_mode=False, show_groundtruth=False, groundtruth_box_visualization_color='black', min_score_thresh=.5, max_num_predictions=20, skip_scores=False, skip_labels=False, keep_image_id_for_visualization_export=False): """Visualizes detection results and writes visualizations to image summaries. This function visualizes an image with its detected bounding boxes and writes to image summaries which can be viewed on tensorboard. It optionally also writes images to a directory. In the case of missing entry in the label map, unknown class name in the visualization is shown as "N/A". Args: result_dict: a dictionary holding groundtruth and detection data corresponding to each image being evaluated. The following keys are required: 'original_image': a numpy array representing the image with shape [1, height, width, 3] or [1, height, width, 1] 'detection_boxes': a numpy array of shape [N, 4] 'detection_scores': a numpy array of shape [N] 'detection_classes': a numpy array of shape [N] The following keys are optional: 'groundtruth_boxes': a numpy array of shape [N, 4] 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] Detections are assumed to be provided in decreasing order of score and for display, and we assume that scores are probabilities between 0 and 1. tag: tensorboard tag (string) to associate with image. global_step: global step at which the visualization are generated. categories: a list of dictionaries representing all possible categories. Each dict in this list has the following keys: 'id': (required) an integer id uniquely identifying this category 'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza' 'supercategory': (optional) string representing the supercategory e.g., 'animal', 'vehicle', 'food', etc summary_dir: the output directory to which the image summaries are written. export_dir: the output directory to which images are written. If this is empty (default), then images are not exported. agnostic_mode: boolean (default: False) controlling whether to evaluate in class-agnostic mode or not. show_groundtruth: boolean (default: False) controlling whether to show groundtruth boxes in addition to detected boxes groundtruth_box_visualization_color: box color for visualizing groundtruth boxes min_score_thresh: minimum score threshold for a box to be visualized max_num_predictions: maximum number of detections to visualize skip_scores: whether to skip score when drawing a single detection skip_labels: whether to skip label when drawing a single detection keep_image_id_for_visualization_export: whether to keep image identifier in filename when exported to export_dir Raises: ValueError: if result_dict does not contain the expected keys (i.e., 'original_image', 'detection_boxes', 'detection_scores', 'detection_classes') """ detection_fields = fields.DetectionResultFields input_fields = fields.InputDataFields if not set([ input_fields.original_image, detection_fields.detection_boxes, detection_fields.detection_scores, detection_fields.detection_classes, ]).issubset(set(result_dict.keys())): raise ValueError('result_dict does not contain all expected keys.') if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: raise ValueError('If show_groundtruth is enabled, result_dict must contain ' 'groundtruth_boxes.') logging.info('Creating detection visualizations.') category_index = label_map_util.create_category_index(categories) image = np.squeeze(result_dict[input_fields.original_image], axis=0) if image.shape[2] == 1: # If one channel image, repeat in RGB. image = np.tile(image, [1, 1, 3]) detection_boxes = result_dict[detection_fields.detection_boxes] detection_scores = result_dict[detection_fields.detection_scores] detection_classes = np.int32((result_dict[ detection_fields.detection_classes])) detection_keypoints = result_dict.get(detection_fields.detection_keypoints) detection_masks = result_dict.get(detection_fields.detection_masks) detection_boundaries = result_dict.get(detection_fields.detection_boundaries) # Plot groundtruth underneath detections if show_groundtruth: groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints) vis_utils.visualize_boxes_and_labels_on_image_array( image=image, boxes=groundtruth_boxes, classes=None, scores=None, category_index=category_index, keypoints=groundtruth_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=None, groundtruth_box_visualization_color=groundtruth_box_visualization_color) vis_utils.visualize_boxes_and_labels_on_image_array( image, detection_boxes, detection_classes, detection_scores, category_index, instance_masks=detection_masks, instance_boundaries=detection_boundaries, keypoints=detection_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=max_num_predictions, min_score_thresh=min_score_thresh, agnostic_mode=agnostic_mode, skip_scores=skip_scores, skip_labels=skip_labels) if export_dir: if keep_image_id_for_visualization_export and result_dict[fields. InputDataFields() .key]: export_path = os.path.join(export_dir, 'export-{}-{}.png'.format( tag, result_dict[fields.InputDataFields().key])) else: export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) vis_utils.save_image_array_as_png(image, export_path) summary = tf.Summary(value=[ tf.Summary.Value( tag=tag, image=tf.Summary.Image( encoded_image_string=vis_utils.encode_image_array_as_png_str( image))) ]) summary_writer = tf.summary.FileWriterCache.get(summary_dir) summary_writer.add_summary(summary, global_step) logging.info('Detection visualizations written to summary with tag %s.', tag)
def get_classification(self, image): """Determines the color of the traffic light in the image WORK IN PROGRESS Args: image (cv::Mat): image containing the traffic light Returns: int: ID of traffic light color (specified in styx_msgs/TrafficLight) """ if self.debug: tic = time.time() self.current_light = TrafficLight.UNKNOWN image_np_expanded = np.expand_dims(image, axis=0) # run detection with self.classifer_graph.as_default(): (boxes, scores, classes, num) = self.sess.run( [ self.detection_boxes, self.detection_scores, self.detection_classes, self.num_detections ], feed_dict={self.image_tensor: image_np_expanded}) boxes = np.squeeze(boxes) scores = np.squeeze(scores) classes = np.squeeze(classes).astype(np.int32) if self.debug: rospy.loginfo('classes: %s \n scores %s ' % (classes[:5], scores[:5])) # light color prediction min_score_thresh = .3 for i in range(boxes.shape[0]): if scores is None or scores[i] > min_score_thresh: class_name = self.category_index[classes[i]]['name'] if class_name == 'Red': self.current_light = TrafficLight.RED elif class_name == 'Green': self.current_light = TrafficLight.GREEN elif class_name == 'Yellow': self.current_light = TrafficLight.YELLOW break # Here we go we found best match! # Visualization of the results of a detection. visualization_utils.visualize_boxes_and_labels_on_image_array( image, boxes, classes, scores, self.category_index, use_normalized_coordinates=True, line_thickness=8) # For visualization topic output if self.debug: self._current_image = image cv2.imshow('image', image) cv2.waitKey(1) if self.debug: toc = time.time() rospy.loginfo('classifier took {} sec'.format(toc - tic)) return self.current_light
def get_detect_info(self, image_path, threshold): # image = cv2.imread(image_path) # size = image.shape jpg_file_name, jpg_file_path = get_file_name_path(image_path, format_key=".jpg") with self.detection_graph.as_default(): with tf.Session(graph=self.detection_graph) as sess: # Expand dimensions since the model expects images to have shape: [1, None, None, 3] # print(jpg_file_path) info_list = [] for fp, i in zip(jpg_file_path, range(len(jpg_file_path))): image = cv2.imread(fp) size = np.shape(image) # print(size) image_np_expanded = np.expand_dims(image, axis=0) image_tensor = self.detection_graph.get_tensor_by_name( 'image_tensor:0') boxes = self.detection_graph.get_tensor_by_name( 'detection_boxes:0') scores = self.detection_graph.get_tensor_by_name( 'detection_scores:0') classes = self.detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = self.detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), self.category_index, use_normalized_coordinates=True, line_thickness=8) result_list = [] for num in range(len(num_detections)): py_scores = np.array(scores)[num] py_classes = np.array(classes)[num] py_boxes = np.array(boxes)[num] # print(py_scores[0]) if py_scores[num] > threshold: result_l = [ fp, py_classes[num], py_boxes[num], py_scores[num], 1.0 ] else: result_l = [ fp, int(py_classes[num]), py_boxes[num], py_scores[num], 0.0 ] info_list.append(result_l) #print('save %dst txt file: %s'%(i+ 1, txt_path)) print("++++++++++++++++++++++++++++++") print(info_list) return info_list
def get_localization(self, image, visual=False): """Determines the locations of the faces in the image Args: image: camera image Returns: list of bounding boxes: coordinates [y_up, x_left, y_down, x_right] """ category_index = {1: {'id': 1, 'name': u'face'}} # WIDERFACE # category_index={1: {'id': 1, 'name': u'face'}, #FDDB # 2: {'id': 2, 'name': u'eye'}} #FDDB with self.detection_graph.as_default(): image_expanded = np.expand_dims(image, axis=0) (boxes, scores, classes, num_detections) = self.sess.run( [self.boxes, self.scores, self.classes, self.num_detections], feed_dict={self.image_tensor: image_expanded}) # print('num_detections:{}'.format(num_detections)) # print(scores.min(), scores.max()) # print(classes.min(), classes.max()) # visual = True if visual == True: #image.flags.writeable = True vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, min_score_thresh=.4, line_thickness=3) plt.figure(figsize=(9, 6)) plt.imshow(image) plt.show() boxes = np.squeeze(boxes) classes = np.squeeze(classes) scores = np.squeeze(scores) cls = classes.tolist() idx_vec = [ i for i, v in enumerate(cls) if ((v == 1) and (scores[i] > 0.3)) ] if len(idx_vec) == 0: print('no detection!') self.face_boxes = [] else: tmp_face_boxes = [] for idx in idx_vec: dim = image.shape[0:2] box = self.box_normal_to_pixel(boxes[idx], dim) box_h = box[2] - box[0] box_w = box[3] - box[1] ratio = box_h / (box_w + 0.01) if ((ratio > 1.2) and (box_h > 20) and (box_w > 20)): tmp_face_boxes.append(box) print(box, ', confidence: ', scores[idx], 'ratio:', ratio) else: print('wrong ratio or wrong size, ', box, ', confidence: ', scores[idx], 'ratio:', ratio) self.face_boxes = tmp_face_boxes return self.face_boxes