def visualization_py_func_fn(*args): """Visualization function that can be wrapped in a tf.py_func. Args: *args: First 4 positional arguments must be: image - uint8 numpy array with shape (height, width, 3). total - a integer denoting the actual number of boxes. boxes - a numpy array of shape [max_pad_num, 4]. labels - a numpy array of shape [max_pad_num]. scores - a numpy array of shape [max_pad_num]. Returns: uint8 numpy array with shape (height, width, 3) with overlaid boxes. """ image, total, boxes, labels, scores = args for i in range(total): ymin, xmin, ymax, xmax = boxes[i] display_str = '%i%% %s' % (int( scores[i] * 100), labels[i].decode('utf8')) color = STANDARD_COLORS[i % len(STANDARD_COLORS)] draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color, display_str_list=[display_str]) return image
def draw_tracked_people(img_bgr, tracked_people): """ Draw bounding box and mask of detected and tracked people, each with a different color based on their ids. :param img_bgr: Original image where people are detected and tracked. :param tracked_people: a list of TrackedPerson objects. :return A numpy """ if len(tracked_people) == 0: return img_bgr img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) # img_pil = Image.fromarray(img_rgb) for person in tracked_people: color = STANDARD_COLORS[np.random.randint(1, 100) % len(STANDARD_COLORS)] text = label_to_text(person.male) if person.facemask == 1: text += facemask_text if person.formal == 1: text += formal_text if person.hat == 1: text += hat_text if person.jeans == 1: text += jeans_text if person.logo == 1: text += logo_text if person.longhair == 1: text += longhair_text if person.longpants == 1: text += longpants_text if person.longsleeve == 1: text += longsleeve_text if person.shorts == 1: text += shorts_text if person.skirt == 1: text += skirt_text if person.stripe == 1: text += stripe_text if person.sunglass == 1: text += sunglass_text if person.tshirt == 1: text += tshirt_text if person.box is not None: xmin, ymin, xmax, ymax = person.box draw_bounding_box_on_image_array(img_rgb, ymin, xmin, ymax, xmax, color=color, thickness=2, display_str_list=[text[:-3]], use_normalized_coordinates=False) # if person.face_box is not None: # xmin, ymin, xmax, ymax = person.face_box # draw_bounding_box_on_image_array(img_rgb, ymin, xmin, ymax, xmax, color=color, thickness=2, # display_str_list=['Score:{:.2f}'.format(person.face_score)], # use_normalized_coordinates=False) # if person.mask is not None: # draw_mask_on_image_array(img_rgb, person.mask, color=color) return cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
def get_current_frame_with_objects(self): # make a copy of the current detected objects detected_objects = self.detected_objects.copy() # lock and make a copy of the current frame with self.frame_lock: frame = self.shared_frame_np.copy() # convert to RGB for drawing frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # draw the bounding boxes on the screen for obj in detected_objects: vis_util.draw_bounding_box_on_image_array(frame, obj['ymin'], obj['xmin'], obj['ymax'], obj['xmax'], color='red', thickness=2, display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))], use_normalized_coordinates=False) for region in self.regions: color = (255,255,255) cv2.rectangle(frame, (region['x_offset'], region['y_offset']), (region['x_offset']+region['size'], region['y_offset']+region['size']), color, 2) # convert back to BGR frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) return frame
def run(self): motion_start = 0.0 motion_end = 0.0 while True: # while there is motion while len([r for r in self.motion_regions if r.is_set()]) > 0: # wait until objects have been parsed with self.objects_parsed: self.objects_parsed.wait() # make a copy of detected objects detected_objects = self.detected_objects.copy() detected_people = [obj for obj in detected_objects if obj['name'] == 'person'] # make a copy of the recent frames recent_frames = self.recent_frames.copy() # get the highest scoring person new_best_person = max(detected_people, key=lambda x:x['score'], default=self.best_person) # if there isnt a person, continue if new_best_person is None: continue # if there is no current best_person if self.best_person is None: self.best_person = new_best_person # if there is already a best_person else: now = datetime.datetime.now().timestamp() # if the new best person is a higher score than the current best person # or the current person is more than 1 minute old, use the new best person if new_best_person['score'] > self.best_person['score'] or (now - self.best_person['frame_time']) > 60: self.best_person = new_best_person if not self.best_person is None and self.best_person['frame_time'] in recent_frames: best_frame = recent_frames[self.best_person['frame_time']] best_frame = cv2.cvtColor(best_frame, cv2.COLOR_BGR2RGB) # draw the bounding box on the frame vis_util.draw_bounding_box_on_image_array(best_frame, self.best_person['ymin'], self.best_person['xmin'], self.best_person['ymax'], self.best_person['xmax'], color='red', thickness=2, display_str_list=["{}: {}%".format(self.best_person['name'],int(self.best_person['score']*100))], use_normalized_coordinates=False) # convert back to BGR self.best_frame = cv2.cvtColor(best_frame, cv2.COLOR_RGB2BGR) motion_end = datetime.datetime.now().timestamp() # wait for the global motion flag to change with self.motion_changed: self.motion_changed.wait() motion_start = datetime.datetime.now().timestamp()
def main(_): # Enable Verbose Logging tf.logging.set_verbosity(tf.logging.INFO) # Check if all required flags are present required_flags = ['image', 'output_path', 'inference_graph'] for flag_name in required_flags: if not getattr(FLAGS, flag_name): raise ValueError('Flag --{} is required'.format(flag_name)) # Load category map ''' A category index, which is a dictionary that maps integer ids to dicts containing categories, e.g. {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...} ''' category_index_from_labelmap = label_map_util.create_category_index_from_labelmap( FLAGS.path_protofile) with tf.Session() as sess: input_path = FLAGS.image tf.logging.info('Reading input from ', input_path) # Obtain image tensor image_tensor = load_image(input_path) # Run graph tf.logging.info('Reading graph and building model...') (detected_boxes_tensor, detected_scores_tensor,\ detected_labels_tensor) = detection_inference.build_inference_graph(image_tensor, FLAGS.inference_graph) # Get detections (detected_boxes, detected_scores,\ detected_labels)=sess.run([detected_boxes_tensor, detected_scores_tensor,\ detected_labels_tensor]) # Detected boxes of form: [ymins,xmins,ymax,xmax] input_image = sess.run(image_tensor) print(input_image) input_image = np.squeeze(input_image) # Draw bounding boxes print(detected_boxes, detected_scores) ii = np.where(detected_scores > FLAGS.confidence) for i in range(len(detected_scores[ii])): ymin = detected_boxes[i][0] xmin = detected_boxes[i][1] ymax = detected_boxes[i][2] xmax = detected_boxes[i][3] category = category_index_from_labelmap[detected_labels[i]]['name'] vis_utils.draw_bounding_box_on_image_array(input_image,xmin=xmin,ymin=ymin,\ xmax=xmax, ymax=ymax,display_str_list=(category) ,color='MediumPurple') vis_utils.save_image_array_as_png(input_image, FLAGS.output_path)
def visualize_box(bg_image, box, display_str, color): ymin, xmin, ymax, xmax = box vis_util.draw_bounding_box_on_image_array(bg_image, ymin, xmin, ymax, xmax, color=color, thickness=4, display_str_list=[display_str], use_normalized_coordinates=False)
def _label_image(self, image, box, score, class_idx=1): ymin, xmin, ymax, xmax = box class_label = self.category_index[class_idx]['name'] display_str = '{}: {}%'.format(class_label, int(100 * score)) vis_util.draw_bounding_box_on_image_array( image, ymin, xmin, ymax, xmax, color='aquamarine', thickness=8, display_str_list=[display_str], use_normalized_coordinates=True) return image
def test_draw_bounding_box_on_image_array(self): test_image = self.create_colorful_test_image() width_original = test_image.shape[0] height_original = test_image.shape[1] ymin = 0.25 ymax = 0.75 xmin = 0.4 xmax = 0.6 visualization_utils.draw_bounding_box_on_image_array( test_image, ymin, xmin, ymax, xmax) width_final = test_image.shape[0] height_final = test_image.shape[1] self.assertEqual(width_original, width_final) self.assertEqual(height_original, height_final)
def test_draw_bounding_box_on_image_array(self): test_image = self.create_colorful_test_image() width_original = test_image.shape[0] height_original = test_image.shape[1] ymin = 0.25 ymax = 0.75 xmin = 0.4 xmax = 0.6 visualization_utils.draw_bounding_box_on_image_array( test_image, ymin, xmin, ymax, xmax) width_final = test_image.shape[0] height_final = test_image.shape[1] self.assertEqual(width_original, width_final) self.assertEqual(height_original, height_final)
def draw_boxes(image, box_to_color_map, box_to_display_str_map, line_thickness, use_normalized_coordinates): # Draw all boxes onto image. for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box draw_bounding_box_on_image_array( image, ymin, xmin, ymax, xmax, color=color, thickness=line_thickness, display_str_list=box_to_display_str_map[box], use_normalized_coordinates=use_normalized_coordinates) return image
def insight_recognize(image_np, frame): names, bounding_boxes = recognizer.recognize(image_np) if len(names) != 0: for idx, name in enumerate(names): if name: vis_util.draw_bounding_box_on_image_array( frame, bounding_boxes[idx][1], bounding_boxes[idx][0], bounding_boxes[idx][1] + bounding_boxes[idx][3], bounding_boxes[idx][0] + bounding_boxes[idx][2], color='Magenta', display_str_list=[str(name)], use_normalized_coordinates=False) return frame
def draw_tracker_boxes(image, box_to_id_map, line_thickness, use_normalized_coordinates, color): # Draw all boxes onto image. for box, id in box_to_id_map.items(): ymin, xmin, ymax, xmax = box draw_bounding_box_on_image_array( image, ymin, xmin, ymax, xmax, color=color, #'Pink''Chartreuse', thickness=line_thickness, display_str_list=['car: ' + id], use_normalized_coordinates=use_normalized_coordinates) return image
def draw_tracked_people(img_bgr, tracked_people): """ Draw bounding box and mask of detected and tracked people, each with a different color based on their ids. :param img_bgr: Original image where people are detected and tracked. :param tracked_people: a list of TrackedPerson objects. :return A numpy """ if len(tracked_people) == 0: return img_bgr img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) # img_pil = Image.fromarray(img_rgb) for person in tracked_people: color = STANDARD_COLORS[person.id % len(STANDARD_COLORS)] if person.body_box is not None: xmin, ymin, xmax, ymax = person.body_box draw_bounding_box_on_image_array(img_rgb, ymin, xmin, ymax, xmax, color=color, display_str_list=[ 'ID:{} Score:{:.2f}'.format( person.id, person.body_score) ], use_normalized_coordinates=False) if person.face_box is not None: xmin, ymin, xmax, ymax = person.face_box draw_bounding_box_on_image_array( img_rgb, ymin, xmin, ymax, xmax, color=color, display_str_list=['Score:{:.2f}'.format(person.face_score)], use_normalized_coordinates=False) if person.body_mask is not None: draw_mask_on_image_array(img_rgb, person.body_mask, color=color) return cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
def detection_callback(self, msg): """ Callback for bounding box detections from the neural net """ object_ids = [] rois = [] for detection in msg.detections: if detection.label == 'person': object_ids.append(detection.id) rois.append(detection.mask.roi) if len(object_ids) < 1: self.frames_missing += 1 self.pub_detections_image.publish(self.cached_image) return if self.track_id in object_ids: #Found object, reset counter and draw a bounding box self.frames_missing = 0 roi = rois[object_ids.index(self.track_id)] #Number between -1 and 1 for object position x_relative = ((roi.x + roi.x + roi.width) / float(self.cached_image.width)) - 1.0 self.steering_cmd.steering_wheel_angle_cmd = x_relative * -10 try: cv_image = self._bridge.imgmsg_to_cv2(self.cached_image, "bgr8") vis_util.draw_bounding_box_on_image_array( cv_image, roi.y, roi.x, roi.y + roi.height, roi.x + roi.width, use_normalized_coordinates=False) msg_im = self._bridge.cv2_to_imgmsg(cv_image, encoding="passthrough") self.pub_detections_image.publish(msg_im) except CvBridgeError as e: print(e) else: self.frames_missing += 1 if self.frames_missing > self.reset_threshold: self.track_id = min(object_ids) self.pub_detections_image.publish(self.cached_image)
def draw_bounding_box_py_func_fn(*args): """Bounding box drawing function that can be wrapped in a tf.py_func. Args: *args: First 5 positional arguments must be: image - uint8 numpy array with shape (height, width, 3). total - a integer denoting the actual number of boxes. boxes - a numpy array of shape [max_pad_num, 4]. labels - a numpy array of shape [max_pad_num]. scores - a numpy array of shape [max_pad_num]. Returns: uint8 numpy array with shape (height, width, 3) with overlaid boxes. """ image, total, boxes, labels, scores = args[:5] thickness = 1 if len(args) > 5: thickness = args[5] for i in range(total - 1, -1, -1): ymin, xmin, ymax, xmax = boxes[i] display_str = '' if labels is not None and scores is not None: display_str = '%i%% %s' % (int(scores[i] * 100), labels[i].decode('utf8')) elif labels is not None: try: display_str = '%s' % (labels[i].decode('utf8')) except Exception as ex: display_str = labels[i] elif scores is not None: display_str = '%i%%' % (int(scores[i] * 100)) color = STANDARD_COLORS[i % len(STANDARD_COLORS)] display_str_list = [display_str] if display_str else [] draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color, thickness=thickness, display_str_list=display_str_list) return image
def draw_fruits_box(img_bgr, fruits): """ Draw bounding box and distance of detected apple. :param img_bgr: Original image where apple are detected. :param fruits: a list of fruits objects. :return A numpy """ if len(fruits) == 0: return img_bgr img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) wight, height, _ = img_rgb.shape # img_pil = Image.fromarray(img_rgb) # for fruit in fruits: # if fruit.box is not None: # xmin, ymin, xmax, ymax = fruit.box # draw_bounding_box_on_image_array(img_rgb, ymin, xmin, ymax, xmax, color='white', thickness=2, # display_str_list=[ # ' Type: {} Distance:{:.2f}cm'.format(fruit.cls, fruit.distance)], # use_normalized_coordinates=False) for fruit in fruits: if fruit.box is not None: xmin, ymin, xmax, ymax = fruit.box draw_bounding_box_on_image_array( img_rgb, ymin, xmin, ymax, xmax, color='white', thickness=2, display_str_list=[ ' Type:{}|XYZ=({:.2f},{:.2f},{:.2f})cm|Size:{:.2f}cm'. format(fruit.cls, fruit.distance, (xmin + xmax - wight) * 0.5 * (28 / 210), (height - (ymin + ymax)) * 0.5 * (28 / 210), fruit.size) ], use_normalized_coordinates=False) return cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
def visualize_boxes(self, image, index): for i in range(self.output.size()): xmin, xmax, ymin, ymax = self.output.get_coordinates(i) if self.output.get_label(i) in self.category_index.keys(): class_name = self.category_index[self.output.get_label( i)]['name'] else: class_name = 'N/A' display_str = '{}: {}%'.format(class_name, int(100 * self.output.get_score(i))) color = (0, 255, 0) if i == index: color = (0, 0, 255) display_str += ' TRACKING' vis_util.draw_bounding_box_on_image_array(image, ymin, xmin, ymax, xmax, color, 2, [display_str], False)
def imagestream(): while True: # max out at 5 FPS time.sleep(0.2) # make a copy of the current detected objects detected_objects = DETECTED_OBJECTS.copy() # lock and make a copy of the current frame with frame_lock: frame = frame_arr.copy() # convert to RGB for drawing frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # draw the bounding boxes on the screen for obj in detected_objects: vis_util.draw_bounding_box_on_image_array( frame, obj['ymin'], obj['xmin'], obj['ymax'], obj['xmax'], color='red', thickness=2, display_str_list=[ "{}: {}%".format(obj['name'], int(obj['score'] * 100)) ], use_normalized_coordinates=False) for region in regions: color = (255, 255, 255) if region['motion_detected'].is_set(): color = (0, 255, 0) cv2.rectangle(frame, (region['x_offset'], region['y_offset']), (region['x_offset'] + region['size'], region['y_offset'] + region['size']), color, 2) # convert back to BGR frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # encode the image into a jpg ret, jpg = cv2.imencode('.jpg', frame) yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
def aip_msearch(image_np, frame, groupId): image = cv2.imencode('.jpg', image_np)[1] image_code = str(base64.b64encode(image))[2:-1] imageType = "BASE64" options = {} options["max_face_num"] = 10 options["max_user_num"] = 1 res = client.multiSearch(image_code, imageType, groupId, options) # print(res) if res['result']: for face in res['result']['face_list']: x, y, z, w = face['location']['left'], face['location']['top'], face['location']['width'], \ face['location']['height'] x, y, z, w = int(x), int(y), int(z), int(w) if face['user_list']: person_id = face['user_list'][0]['user_id'] # print(person_id, x,y,z,w) if face['user_list'][0]['score'] > 80: vis_util.draw_bounding_box_on_image_array( frame, y, x, y + w, x + z, color='Magenta', display_str_list=[ person_id + ': ' + str(round(face['user_list'][0]['score'])) + '%'], use_normalized_coordinates=False) return frame
def run(label_map_path, config_file_path, checkpoint_path, cam_ip): global app # Activa el uso de GPU para el procesamiento si es que está disponible gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # Obtención de los datos de configuración configs = config_util.get_configs_from_pipeline_file(config_file_path) model_config = configs['model'] detection_model = model_builder.build(model_config=model_config, is_training=False) # Recupera el checkpoint entregado desde la red neuronal. ckpt = tf.compat.v2.train.Checkpoint(model=detection_model) ckpt.restore(checkpoint_path).expect_partial() category_index = label_map_util.create_category_index_from_labelmap(label_map_path, use_display_name=True) # Solicitud de conexión a la Cámara IP sobre esta IP # Obtención del streming de video desde una Cámara IP y capturado por OpenCV. videoStreamAddress = cam_ip cap = cv2.VideoCapture(videoStreamAddress) # Este ciclo infinito realiza la detección de mascarillas sobre cada frame recibido. while True: border_color = 'green' _, image_np = cap.read() # Procesamiento del frame con la función detect pasandole por parámetro el modelo de la CNN. input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections, _, _ = detect(input_tensor, detection_model) detection_boxes = detections['detection_boxes'][0].numpy() detection_classes = detections['detection_classes'][0].numpy() detection_scores = detections['detection_scores'][0].numpy() indexes = np.array(tf.image.non_max_suppression( detection_boxes, detection_scores, max_output_size=100, iou_threshold=0.5, score_threshold=0.3)) detection_boxes = detection_boxes[indexes] detection_classes = detection_classes[indexes] detection_scores = detection_scores[indexes] label_id_offset = 1 image_np_with_detections = image_np.copy() # Interpretación del resultado de la detección con bonding boxes sobre los rostros detectados if indexes.shape != 0: viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detection_boxes, (detection_classes + label_id_offset).astype(int), detection_scores, category_index, use_normalized_coordinates=True, max_boxes_to_draw=10, min_score_thresh=.30, agnostic_mode=False, semaphore_mode=True) ### Integración de la detección con la intefaz web ### # Caso sin mascarilla o mascarilla mal puesta en al menos una detección del frame en curso if any(c == 0 or c == 2 for c in detection_classes): requests.get(url=URL + '1') # Se envía una señal de activación al endpoint de acción de la alarma border_color = 'blue' #rojo (bug de TF) # sino, todo ok, verde else: requests.get(url=URL + '0') border_color = 'green' #verde # Bounding box en el contorno de la imagen para facilitar la visualización de la alerta viz_utils.draw_bounding_box_on_image_array( image_np_with_detections, 0, 0, 1, 1, color = border_color, thickness = 20) # Retorno de la imagen resultante para su streaming frame = cv2.resize(image_np_with_detections, (800, 600)) _, jpeg = cv2.imencode('.jpg', frame) frame_encoded_jpeg = jpeg.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame_encoded_jpeg + b'\r\n\r\n') cap.release() cv2.destroyAllWindows()
def recognize(self): PATH_TO_FROZEN_GRAPH = 'model/frozen_inference_graph.pb' PATH_TO_LABELS = 'model/labelmap.pbtxt' NUM_CLASSES = 1 width = 1280 #704 height = 720 #416 # Load a (frozen) Tensorflow model into memory. print("Loading model") detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.8 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Perform the inference exit_signal = False with detection_graph.as_default() and tf.Session( config=config, graph=detection_graph) as sess: print("INITIALIZING NEW SESSION") while not exit_signal: if (len(self.rgb_frame_buffer) > 0 and len(self.depth_frame_buffer) > 0): # aquisicao de imagens frame = self.rgb_frame_buffer[0] self.rgb_frame_buffer.pop(0) # aquisicao de imagens de profundidade depth_frame = self.depth_frame_buffer[0] self.depth_frame_buffer.pop(0) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(frame, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detu,vction and storing targets positions box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) research_distance_box = 30 targets_pos = [] image_np = frame num_detections_ = num_detections.astype(int)[0] boxes_ = np.squeeze(boxes) classes_ = np.squeeze(classes).astype(np.int32) scores_ = np.squeeze(scores) for i in range(num_detections_): confidence = 0.8 if scores_[i] > confidence: box = tuple(boxes_[i].tolist()) if classes_[i] in category_index.keys(): class_name = category_index[ classes_[i]]['name'] display_str = str(class_name) if not display_str: display_str = '{}%'.format( int(100 * scores_[i])) else: display_str = '{}: {}%'.format( display_str, int(100 * scores_[i])) # Find object distance ymin, xmin, ymax, xmax = box x_center = int(xmin * width + (xmax - xmin) * width * 0.5) y_center = int(ymin * height + (ymax - ymin) * height * 0.5) x_vect = [] y_vect = [] z_vect = [] # the points used for calculating distances are at most 30 pixels from the center min_y_r = max( int(ymin * height), int(y_center - research_distance_box)) min_x_r = max( int(xmin * width), int(x_center - research_distance_box)) max_y_r = min( int(ymax * height), int(y_center + research_distance_box)) max_x_r = min( int(xmax * width), int(x_center + research_distance_box)) if min_y_r < 0: min_y_r = 0 if min_x_r < 0: min_x_r = 0 if max_y_r > height: max_y_r = height if max_x_r > width: max_x_r = width for j_ in range(min_y_r, max_y_r): for i_ in range(min_x_r, max_x_r): x = depth_frame[j_, i_][0] y = depth_frame[j_, i_][1] z = depth_frame[j_, i_][2] if not np.isnan(z) and not np.isinf(z): if not np.isnan(y) and not np.isinf(y): if not np.isnan( x) and not np.isinf(x): x_vect.append(x) y_vect.append(y) z_vect.append(z) if len(x_vect) > 0: aux = [] x = statistics.median(x_vect) y = statistics.median(y_vect) z = statistics.median(z_vect) # Getting the position of the targets detected aux.append(round(x, 2)) aux.append(round(y, 2)) aux.append(round(z, 2)) targets_pos.append(aux) # Calculating distances distance = math.sqrt(x * x + y * y + z * z) display_str = display_str + " " + str( '% 6.2f' % distance) + " m " box_to_display_str_map[box].append(display_str) box_to_color_map[ box] = vis_util.STANDARD_COLORS[ classes_[i] % len(vis_util.STANDARD_COLORS)] print(targets_pos) for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box vis_util.draw_bounding_box_on_image_array( frame, ymin, xmin, ymax, xmax, color=color, thickness=4, display_str_list=box_to_display_str_map[box], use_normalized_coordinates=True) cv2.imshow('ZED object detection', frame) if cv2.waitKey(10) & 0xFF == ord('q'): cv2.destroyAllWindows() exit_signal = True sess.close()
def display_objects_distances(image_np, depth_np, num_detections, boxes_, classes_, scores_, category_index,assigned,unassigned,trackidcount): box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) research_distance_box = 30 # 应当注意,这里不确定categories中人对应的是哪一个index for i in range(num_detections): #给编号为i的detection找到对应的track编号 #分两种情况:1.对应的track编号在assigned数组里面。直接便利查询assigned数组即可 # 2.在unassigned里面,遍历查询,用现在的总数减去对应的编号数 idx=-1 for i_assigned in len(assigned): if(assigned[i_assigned]==i): idx=i_assigned break #注意这里可能有计数错误 if(idx==-1): for i_unassigned in len(unassigned): if(unassigned[i_unassigned]==i): idx=trackidcount-i_unassigned-1 if scores_[i] > confidence: box = tuple(boxes_[i].tolist()) if classes_[i] in category_index.keys(): class_name = category_index[classes_[i]]['name'] # display_str = '' display_str = str(class_name)+str(idx) if not display_str: display_str = '{}%'.format(int(100 * scores_[i])) else: display_str = '{}: {}%'.format(display_str, int(100 * scores_[i])) # Find object distance ymin, xmin, ymax, xmax = box x_center = int(xmin * width + (xmax - xmin) * width * 0.5) y_center = int(ymin * height + (ymax - ymin) * height * 0.5) x_vect = [] y_vect = [] z_vect = [] min_y_r = max(int(ymin * height), int(y_center - research_distance_box)) min_x_r = max(int(xmin * width), int(x_center - research_distance_box)) max_y_r = min(int(ymax * height), int(y_center + research_distance_box)) max_x_r = min(int(xmax * width), int(x_center + research_distance_box)) if min_y_r < 0: min_y_r = 0 if min_x_r < 0: min_x_r = 0 if max_y_r > height: max_y_r = height if max_x_r > width: max_x_r = width for j_ in range(min_y_r, max_y_r): for i_ in range(min_x_r, max_x_r): z = depth_np[j_, i_, 2] if not np.isnan(z) and not np.isinf(z): x_vect.append(depth_np[j_, i_, 0]) y_vect.append(depth_np[j_, i_, 1]) z_vect.append(z) if len(x_vect) > 0: x = statistics.median(x_vect) y = statistics.median(y_vect) z = statistics.median(z_vect) distance = math.sqrt(x * x + y * y + z * z) display_str = display_str + " " + str('% 6.2f' % distance) + "m\n" + str('% 6.2f' % x) + "m" + str( '% 6.2f' % y) + "m" + str('% 6.2f' % z) + "m" # -------------------------------------------- box_to_display_str_map[box].append(display_str) box_to_color_map[box] = vis_util.STANDARD_COLORS[classes_[i] % len(vis_util.STANDARD_COLORS)] for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box vis_util.draw_bounding_box_on_image_array( image_np, ymin, xmin, ymax, xmax, color=color, thickness=50, display_str_list=box_to_display_str_map[box], use_normalized_coordinates=True) return image_np
def Quadrant(y, x, beta, slope, photo, d, caja, clase): # (y,x) centroid detection box # normalized coordinates x_param = 2 * x / self.image.shape[1] - 1 y_param = 2 * y / self.image.shape[0] - 1 y_axis = [] if x_param >= 0 and y_param >= 0: # 4th if d == 0: x_axis = np.arange(self.image_bottom[1], x + 1) if slope > 1: slope = abs(slope) - abs(int(slope)) else: x_axis = np.arange(self.image_bottom[0], y + 1) if slope < 1: slope += 1 x_axis = x_axis[::-1] elif x_param < 0 and y_param >= 0: # 3rd if d == 0: x_axis = np.arange(x, self.image_bottom[1] + 1) # 2 | 1 if abs(slope) > 1: slope = (-1) * (abs(slope) - abs(int(slope)) ) # _____|_____ else: # 3 | 4 quadrants x_axis = np.arange(self.image_bottom[0], y + 1) # | x_axis = x_axis[::-1] if abs(slope) < 1: slope -= 1 elif x_param >= 0 and y_param < 0: # 1st if d == 0: x_axis = np.arange(self.image_bottom[1], x + 1) if abs(slope) > 1: slope = (-1) * (abs(slope) - abs(int(slope))) x_axis = x_axis[::-1] else: x_axis = np.arange(y, self.image_bottom[0] + 1) if abs(slope) < 1: slope -= 1 elif x_param < 0 and y_param < 0: # 2nd if d == 0: x_axis = np.arange(x, self.image_bottom[1] + 1) if slope > 1: slope = slope - int(slope) else: x_axis = np.arange(y, self.image_bottom[0] + 1) if slope < 1: slope += 1 if d == 0: def straight(x__, x, y, slope): return (slope * (x__ - x) + y) else: def straight( x__, x, y, slope): # do not get confused!!! this x__ is a y__ return ((x__ - y) / float(slope) + x) for x__ in x_axis: y__ = straight(x__, self.image_bottom[1], self.image_bottom[0], slope) y_axis.append(y__) y_axis = np.array(y_axis) # optimization points lim = 0 beta *= (self.max_length / self.angle) coords = [-100, -100] for x_p, y_p in zip(x_axis, y_axis): if d == 0: bottom2Coord = np.linalg.norm( np.array([y_p, x_p]) - self.image_bottom) #cv2.circle(photo,(int(x_p),int(y_p)),1,(255,255,0),-1) else: bottom2Coord = np.linalg.norm( np.array([x_p, y_p]) - self.image_bottom) #cv2.circle(photo,(int(y_p),int(x_p)),1,(255,255,0),-1) if bottom2Coord <= abs(beta): if bottom2Coord > lim: lim = bottom2Coord if d == 0: coords = [y_p, x_p] else: coords = [x_p, y_p] # coords y,x #cv2.circle(photo,(int(coords[1]),int(coords[0])),3,(255,0,0),-1) geom_coords = geometry.Point([coords[1], coords[0]]) if self.poly.contains(geom_coords): self.rec_points.append([int(coords[0]), int(coords[1])]) vis_util.draw_bounding_box_on_image_array( photo, caja[0], caja[1], caja[2], caja[3], color='red', thickness=4, display_str_list=()) # HEADS cv2.circle(photo, (int(coords[1]), int(coords[0])), 3, (255, 0, 0), -1) #cv2.putText(photo, str(self.zones), (int(coords[1]-10),int(coords[0]+10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA) return (1) return (0)
def Draw_detections(self, input1, n, H, h): print("------ DRAWING DETECTIONS AND OPTIMIZING PROJECTIONS ------") self.rec_points = [] def Quadrant(y, x, beta, slope, photo, d, caja, clase): # (y,x) centroid detection box # normalized coordinates x_param = 2 * x / self.image.shape[1] - 1 y_param = 2 * y / self.image.shape[0] - 1 y_axis = [] if x_param >= 0 and y_param >= 0: # 4th if d == 0: x_axis = np.arange(self.image_bottom[1], x + 1) if slope > 1: slope = abs(slope) - abs(int(slope)) else: x_axis = np.arange(self.image_bottom[0], y + 1) if slope < 1: slope += 1 x_axis = x_axis[::-1] elif x_param < 0 and y_param >= 0: # 3rd if d == 0: x_axis = np.arange(x, self.image_bottom[1] + 1) # 2 | 1 if abs(slope) > 1: slope = (-1) * (abs(slope) - abs(int(slope)) ) # _____|_____ else: # 3 | 4 quadrants x_axis = np.arange(self.image_bottom[0], y + 1) # | x_axis = x_axis[::-1] if abs(slope) < 1: slope -= 1 elif x_param >= 0 and y_param < 0: # 1st if d == 0: x_axis = np.arange(self.image_bottom[1], x + 1) if abs(slope) > 1: slope = (-1) * (abs(slope) - abs(int(slope))) x_axis = x_axis[::-1] else: x_axis = np.arange(y, self.image_bottom[0] + 1) if abs(slope) < 1: slope -= 1 elif x_param < 0 and y_param < 0: # 2nd if d == 0: x_axis = np.arange(x, self.image_bottom[1] + 1) if slope > 1: slope = slope - int(slope) else: x_axis = np.arange(y, self.image_bottom[0] + 1) if slope < 1: slope += 1 if d == 0: def straight(x__, x, y, slope): return (slope * (x__ - x) + y) else: def straight( x__, x, y, slope): # do not get confused!!! this x__ is a y__ return ((x__ - y) / float(slope) + x) for x__ in x_axis: y__ = straight(x__, self.image_bottom[1], self.image_bottom[0], slope) y_axis.append(y__) y_axis = np.array(y_axis) # optimization points lim = 0 beta *= (self.max_length / self.angle) coords = [-100, -100] for x_p, y_p in zip(x_axis, y_axis): if d == 0: bottom2Coord = np.linalg.norm( np.array([y_p, x_p]) - self.image_bottom) #cv2.circle(photo,(int(x_p),int(y_p)),1,(255,255,0),-1) else: bottom2Coord = np.linalg.norm( np.array([x_p, y_p]) - self.image_bottom) #cv2.circle(photo,(int(y_p),int(x_p)),1,(255,255,0),-1) if bottom2Coord <= abs(beta): if bottom2Coord > lim: lim = bottom2Coord if d == 0: coords = [y_p, x_p] else: coords = [x_p, y_p] # coords y,x #cv2.circle(photo,(int(coords[1]),int(coords[0])),3,(255,0,0),-1) geom_coords = geometry.Point([coords[1], coords[0]]) if self.poly.contains(geom_coords): self.rec_points.append([int(coords[0]), int(coords[1])]) vis_util.draw_bounding_box_on_image_array( photo, caja[0], caja[1], caja[2], caja[3], color='red', thickness=4, display_str_list=()) # HEADS cv2.circle(photo, (int(coords[1]), int(coords[0])), 3, (255, 0, 0), -1) #cv2.putText(photo, str(self.zones), (int(coords[1]-10),int(coords[0]+10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA) return (1) return (0) people = 0 if n == 1: photo = self.image elif n == 0: photo = self.copy_image else: photo = self.final_image #cv2.circle(photo,(self.image_center[1],self.image_center[0]),7,(255,255,255),-1) for (puntaje, caja, clase) in zip(self.scores, self.boxes, self.classes): distance_x = caja[3] - caja[1] distance_y = caja[2] - caja[0] point = np.array([(caja[1] + distance_x / 2.0) * photo.shape[1], (caja[0] + distance_y / 2.0) * photo.shape[0] ]) # (x,y) centroid box if point[1] < 0: point[1] = 0 if point[0] < 0: point[0] = 0 if (puntaje >= self.min_score) and (clase == 1.0 or clase == 4.0): # pixel's distance to radians gamma = np.linalg.norm( np.array([point[1], point[0]]) - self.image_bottom) gamma *= (self.angle / self.max_length) d1_prima = np.tan(gamma) * H d1 = d1_prima - np.tan(gamma) * h alpha = np.arctan(d1 / H) beta = abs(gamma) - abs(alpha) slope = (point[1] - self.image_bottom[0]) / ( point[0] - self.image_bottom[1]) if abs(slope) >= 1: d = 1 elif abs(slope) < 1: d = 0 people += Quadrant(int(point[1]), int(point[0]), float(beta), slope, photo, d, caja, clase) elif clase == 2.0: x = np.array([ caja[1] + distance_x * input1 / 2.0, caja[1] + distance_x * (1 - input1 / 2.0), caja[1] + distance_x * (1 - input1 / 2.0), caja[1] + distance_x * input1 / 2.0 ]) * photo.shape[1] y = np.array([ caja[0] + distance_y * input1 / 2.0, caja[0] + distance_y * input1 / 2.0, caja[0] + distance_y * (1 - input1 / 2.0), caja[0] + distance_y * (1 - input1 / 2.0) ]) * photo.shape[0] vis_util.draw_bounding_box_on_image_array( photo, caja[0] + distance_y * input1 / 2.0, caja[1] + distance_x * input1 / 2.0, caja[2] - distance_y * input1 / 2.0, caja[3] - distance_x * input1 / 2.0, color='yellow', thickness=4, display_str_list=()) # WHEELCHAIRS wheelchair_area = self.measures.PolyArea(x, y) return ([wheelchair_area, people])
def predict(): start = time.time() return_data = [] # check if the post request has the file part if 'file' not in request.files: print('No file part') print(str(request.files)) return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename if file.filename == '': print('No selected file') return redirect(request.url) if file: #filename = file.filename name = str(uuid.uuid4()) upload_filename = name + '.png' full_filename = os.path.join(app.config['UPLOAD_FOLDER'], upload_filename) file.save(full_filename) print("serving: " + full_filename) image = Image.open(full_filename) image = image.convert('RGB') image.thumbnail(IM_SCALED_SIZE) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) ################################################## # Tensorflow part ################################################## sess = app.config['tf_sess'] fetches = app.config['tf_fetches'] image_tensor = app.config['tf_image_tensor'] (boxes, scores, classes, num) = sess.run(fetches, feed_dict={image_tensor: image_np_expanded}) ################################################## # END Tensorflow part ################################################## print "Classification done." class_squeeze = np.squeeze(classes).astype(np.int32) boxes_squeeze = np.squeeze(boxes) scores_squeeze = np.squeeze(scores) # Reopen the full image after the classification has been done with the scaled one image = Image.open(full_filename) image = image.convert('RGB') image.thumbnail(PROCESSED_IM_SIZE) image_np = load_image_into_numpy_array(image) if scores_squeeze[1] > SCORE_THR: print "Two pigs found" draw_boxes = [(boxes_squeeze[0], class_squeeze[0]), (boxes_squeeze[1], class_squeeze[1])] # Sort boxes such that blue is the upper one draw_boxes.sort(key=lambda x: x[0][2]) pig1 = category_index[draw_boxes[0][1]]['name'] pig2 = category_index[draw_boxes[1][1]]['name'] #Draw pig 1 ymin, xmin, ymax, xmax = draw_boxes[0][0] vis_util.draw_bounding_box_on_image_array( image_np, ymin, xmin, ymax, xmax, color='RoyalBlue', thickness=4, use_normalized_coordinates=True) #Draw pig 2 ymin, xmin, ymax, xmax = draw_boxes[1][0] vis_util.draw_bounding_box_on_image_array( image_np, ymin, xmin, ymax, xmax, color='Red', thickness=4, use_normalized_coordinates=True) elif scores_squeeze[0] > SCORE_THR: print "One pig found" pig1 = 'bacon' pig2 = 'bacon' #Draw box ymin, xmin, ymax, xmax = boxes_squeeze[0] vis_util.draw_bounding_box_on_image_array( image_np, ymin, xmin, ymax, xmax, color='Chocolate', thickness=4, use_normalized_coordinates=True) else: print "No pigs found" pig1 = "" pig2 = "" print "Pig 1 = " + pig1 + " Pig 2 = " + pig2 ''' vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=2, max_boxes_to_draw=2, min_score_thresh=SCORE_THR) ''' processed_image = Image.fromarray(image_np) processed_filename = name + '.jpg' full_filename = os.path.join(PROCESSED_FOLDER, processed_filename) processed_image.save(full_filename, 'JPEG', quality=90) return_data = dict() return_data['pig1'] = str(pig1) return_data['pig2'] = str(pig2) return_data['uuid'] = str(name) return_data['img_url'] = os.path.join('processed', processed_filename) #retvalue = json.dumps(data) #retvalue = serve_pil_image(processed_image) #plt.figure(figsize=IMAGE_SIZE) #plt.imshow(image_np) #plt.show() print("Time spent handling the request: %f" % (time.time() - start)) print "Returning " + str(return_data) return jsonify(return_data)
def display_objects_distances(image_np, depth_np, boxes_, classes_, scores_, category_index): box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) research_distance_box = 50 for i in range(min(20, boxes_.shape[0])): if scores_ is None or scores_[i] > .5: box = tuple(boxes_[i].tolist()) if classes_[i] in category_index.keys(): class_name = category_index[classes_[i]]['name'] display_str = str(class_name) if not display_str: display_str = '{}%'.format(int(100 * scores_[i])) else: display_str = '{}: {}%'.format(display_str, int(100 * scores_[i])) # Find object distance ymin, xmin, ymax, xmax = box x_center = int(xmin * width + (xmax - xmin) * width * 0.5) y_center = int(ymin * height + (ymax - ymin) * height * 0.5) x_vect = [] y_vect = [] z_vect = [] for j_ in range(int(y_center - research_distance_box), int(y_center + research_distance_box)): for i_ in range(int(x_center - research_distance_box), int(x_center + research_distance_box)): z = depth_np[j_, i_, 2] if not np.isnan(z) and not np.isinf(z): x_vect.append(depth_np[j_, i_, 0]) y_vect.append(depth_np[j_, i_, 1]) z_vect.append(z) try: x = statistics.median(x_vect) y = statistics.median(y_vect) z = statistics.median(z_vect) except statistics.StatisticsError: x = -1 y = -1 z = -1 pass distance = math.sqrt(x * x + y * y + z * z) display_str = display_str + " " + str('% 6.2f' % distance) + " m " box_to_display_str_map[box].append(display_str) box_to_color_map[box] = vis_util.STANDARD_COLORS[classes_[i] % len( vis_util.STANDARD_COLORS)] for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box vis_util.draw_bounding_box_on_image_array( image_np, ymin, xmin, ymax, xmax, color=color, thickness=8, display_str_list=box_to_display_str_map[box], use_normalized_coordinates=True) return image_np
def _visualize(examples, categories, filename): """Visualizes exmaples. Args: examples: A list of python dict saving examples to be visualized. filename: Path to the output file. """ with open(filename, 'w') as fid: fid.write('<table border=1>') for example_index, example in enumerate(examples): (image_id, image, image_height, image_width, num_gt_boxes, gt_boxes, gt_labels, num_dt_boxes, dt_boxes, dt_scores, dt_labels) = (example[InputDataFields.image_id], example[InputDataFields.image], example[InputDataFields.image_height], example[InputDataFields.image_width], example[InputDataFields.num_objects], example[InputDataFields.object_boxes], example[InputDataFields.object_texts], example[DetectionResultFields.num_detections], example[DetectionResultFields.detection_boxes], example[DetectionResultFields.detection_scores], example[DetectionResultFields.detection_classes]) # Print captions. caption_annot = '' if (InputDataFields.num_captions in example and InputDataFields.caption_strings in example and InputDataFields.caption_lengths in example): (num_captions, caption_strings, caption_lengths) = (example[InputDataFields.num_captions], example[InputDataFields.caption_strings], example[InputDataFields.caption_lengths]) captions = [] for caption_string, caption_length in zip( caption_strings[:num_captions], caption_lengths[:num_captions]): captions.append(' '.join([ x.decode('ascii') for x in caption_string[:caption_length] ])) caption_annot = '</br>'.join(captions) # Generated image-level ground-truth. labels_gt_annot = '' if 'debug_groundtruth_labels' in example: labels_gt = [ categories[i] for i, v in enumerate(example['debug_groundtruth_labels']) if v > 0 ] labels_gt_annot = ','.join(labels_gt) labels_ps_annot = '' if 'debug_pseudo_labels' in example: labels_ps = [ categories[i] for i, v in enumerate(example['debug_pseudo_labels']) if v > 0 ] labels_ps_annot = ','.join(labels_ps) if labels_ps_annot: labels_ps_annot = 'pseudo:' + labels_ps_annot # Image canvas. max_height = 300 ratio = max_height / image_height image_height = max_height image_width = int(image_width * ratio) image = cv2.resize(image, (image_width, image_height)) img_base64 = plotlib._py_convert_to_base64(image[:, :, ::-1]) # Image with ground-truth boxes. image_with_gt = image.copy() for i in range(num_gt_boxes): ymin, xmin, ymax, xmax = gt_boxes[i] label = gt_labels[i].decode('ascii') draw_bounding_box_on_image_array( image_with_gt, ymin, xmin, ymax, xmax, color='red', display_str_list=[label], use_normalized_coordinates=True) image_with_gt = cv2.cvtColor(image_with_gt, cv2.COLOR_RGB2BGR) gt_base64 = plotlib._py_convert_to_base64(image_with_gt) # Image with predicted boxes. for i, dt_score in enumerate(dt_scores): if dt_score < FLAGS.min_visl_detection_score: break num_dt_boxes = min(i, num_dt_boxes) dt_classes = dt_labels - 1 dt_labels = np.array( [categories[int(x) - 1].encode('ascii') for x in dt_labels]) recall_mask, precision_mask = box_utils.py_evaluate_precision_and_recall( num_gt_boxes, gt_boxes, gt_labels, num_dt_boxes, dt_boxes, dt_labels) image_with_dt = image.copy() for i in range(num_dt_boxes - 1, -1, -1): ymin, xmin, ymax, xmax = dt_boxes[i] score = dt_scores[i] label = '%s:%d%%' % (dt_labels[i].decode('ascii'), int(score * 100 + 0.5)) draw_bounding_box_on_image_array( image_with_dt, ymin, xmin, ymax, xmax, color=STANDARD_COLORS[int(dt_classes[i])], display_str_list=[label], use_normalized_coordinates=True) for i in range(num_dt_boxes - 1, -1, -1): if precision_mask[i]: ymin, xmin, ymax, xmax = dt_boxes[i] score = dt_scores[i] label = '%s:%d%%' % (dt_labels[i].decode('ascii'), int(score * 100 + 0.5)) draw_bounding_box_on_image_array( image_with_dt, ymin, xmin, ymax, xmax, color='lime', display_str_list=[label], use_normalized_coordinates=True) image_with_dt = cv2.cvtColor(image_with_dt, cv2.COLOR_RGB2BGR) dt_base64 = plotlib._py_convert_to_base64(image_with_dt) # Write html file. fid.write('<tr>') fid.write('<td>%s</td>' % (image_id.decode('ascii'))) #fid.write('<td><img src="data:image/jpg;base64,%s"></td>' % (img_base64)) fid.write( '<td><img src="data:image/jpg;base64,%s"></br>%s</br>GT: %s</br>PS: %s</td>' % (gt_base64, caption_annot, labels_gt_annot, labels_ps_annot)) fid.write('<td><img src="data:image/jpg;base64,%s"></td>' % (dt_base64)) fid.write('</tr>') fid.write('</table>') tf.logging.info('File is written to %s, #images=%i', filename, example_index)
def display_objects_distances(image_np, depth_np, num_detections, boxes_, classes_, scores_, category_index): box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) research_distance_box = 30 for i in range(num_detections): if scores_[i] > confidence: box = tuple(boxes_[i].tolist()) # print(classes_) if classes_[i] in category_index.keys(): class_name = category_index[classes_[i]]['name'] display_str = str(class_name) if not display_str: display_str = '{}%'.format(int(100 * scores_[i])) else: display_str = '{}: {}%'.format(display_str, int(100 * scores_[i])) # Find object distance ymin, xmin, ymax, xmax = box x_center = int(xmin * width + (xmax - xmin) * width * 0.5) y_center = int(ymin * height + (ymax - ymin) * height * 0.5) x_vect = [] y_vect = [] z_vect = [] min_y_r = max(int(ymin * height), int(y_center - research_distance_box)) min_x_r = max(int(xmin * width), int(x_center - research_distance_box)) max_y_r = min(int(ymax * height), int(y_center + research_distance_box)) max_x_r = min(int(xmax * width), int(x_center + research_distance_box)) if min_y_r < 0: min_y_r = 0 if min_x_r < 0: min_x_r = 0 if max_y_r > height: max_y_r = height if max_x_r > width: max_x_r = width for j_ in range(min_y_r, max_y_r): for i_ in range(min_x_r, max_x_r): z = depth_np[j_, i_, 2] if not np.isnan(z) and not np.isinf(z): x_vect.append(depth_np[j_, i_, 0]) y_vect.append(depth_np[j_, i_, 1]) z_vect.append(z) if len(x_vect) > 0: x = statistics.median(x_vect) y = statistics.median(y_vect) z = statistics.median(z_vect) distance = math.sqrt(x * x + y * y + z * z) display_str = display_str + " " + str( '% 6.2f' % distance) + " m " if "person" in display_str: # is_human_present = True print(display_str + 'x: ' + str(x) + 'y: ' + str(y) + 'z: ' + str(z)) # print(distance) box_to_display_str_map[box].append(display_str) box_to_color_map[box] = vis_util.STANDARD_COLORS[ classes_[i] % len(vis_util.STANDARD_COLORS)] # human_is_present = False for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box vis_util.draw_bounding_box_on_image_array( image_np, ymin, xmin, ymax, xmax, color=color, thickness=4, display_str_list=box_to_display_str_map[box], use_normalized_coordinates=True) return image_np
def visualize_boxes_with_opc_info(image, boxes, classes, scores, category_index, instance_masks=None, keypoints=None, use_normalized_coordinates=False, max_boxes_to_draw=20, min_score_thresh=.5, agnostic_mode=False, line_thickness=4): """Overlay labeled boxes on an image with formatted scores and label names. This function groups boxes that correspond to the same location and creates a display string for each detection and overlays these on the image. Note that this function modifies the image in place, and returns that same image. Args: image: uint8 numpy array with shape (img_height, img_width, 3) boxes: a numpy array of shape [N, 4] classes: a numpy array of shape [N]. Note that class indices are 1-based, and match the keys in the label map. scores: a numpy array of shape [N] or None. If scores=None, then this function assumes that the boxes to be plotted are groundtruth boxes and plot all boxes as black with no classes or scores. category_index: a dict containing category dictionaries (each holding category index `id` and category name `name`) keyed by category indices. instance_masks: a numpy array of shape [N, image_height, image_width], can be None keypoints: a numpy array of shape [N, num_keypoints, 2], can be None use_normalized_coordinates: whether boxes is to be interpreted as normalized coordinates or not. max_boxes_to_draw: maximum number of boxes to visualize. If None, draw all boxes. min_score_thresh: minimum score threshold for a box to be visualized agnostic_mode: boolean (default: False) controlling whether to evaluate in class-agnostic mode or not. This mode will display scores but ignore classes. line_thickness: integer (default: 4) controlling line width of the boxes. Returns: uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes. """ # Create a display string (and color) for every box location, group any boxes # that correspond to the same location. box_to_display_str_map = collections.defaultdict(list) box_to_color_map = collections.defaultdict(str) box_to_instance_masks_map = {} box_to_keypoints_map = collections.defaultdict(list) if not max_boxes_to_draw: max_boxes_to_draw = boxes.shape[0] for i in range(min(max_boxes_to_draw, boxes.shape[0])): if scores is None or scores[i] > min_score_thresh: box = tuple(boxes[i].tolist()) if instance_masks is not None: box_to_instance_masks_map[box] = instance_masks[i] if keypoints is not None: box_to_keypoints_map[box].extend(keypoints[i]) if scores is None: box_to_color_map[box] = 'black' else: if not agnostic_mode: if classes[i] in category_index.keys(): class_name = category_index[classes[i]]['name'] else: class_name = 'N/A' display_str = '{}: {}%'.format(class_name, int(100 * scores[i])) else: display_str = 'score: {}%'.format(int(100 * scores[i])) if class_name in opc_client.opc_id.keys(): display_str = display_str + " " + opc_info.get_value( class_name) print display_str box_to_display_str_map[box].append(display_str) if agnostic_mode: box_to_color_map[box] = 'DarkOrange' else: box_to_color_map[box] = vis_util.STANDARD_COLORS[ classes[i] % len(vis_util.STANDARD_COLORS)] # Draw all boxes onto image. for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box if instance_masks is not None: vis_util.draw_mask_on_image_array(image, box_to_instance_masks_map[box], color=color) vis_util.draw_bounding_box_on_image_array( image, ymin, xmin, ymax, xmax, color=color, thickness=line_thickness, display_str_list=box_to_display_str_map[box], use_normalized_coordinates=use_normalized_coordinates) if keypoints is not None: vis_util.draw_keypoints_on_image_array( image, box_to_keypoints_map[box], color=color, radius=line_thickness / 2, use_normalized_coordinates=use_normalized_coordinates) return image
def show_inference_perso(model, image_path, threshold, saveImg, espece): # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image = Image.open(image_path) image_np = load_image_into_numpy_array( image) # numpy array with shape [height, width, 3] image_np_with_annotations = image_np.copy() # Load groundtruths : on ne garde que le nom de l'image courante à partir du chemin image_name = os.path.basename(image_path) groundtruths = load_groundtruths_on_Test(image_name) # Actual detection. output_dict, elapsed_time = run_inference_for_single_image( model, image_np_with_annotations, []) # Visualization of the results of a detection. viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_annotations, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, # on n'a pas plus de 3 ou 4 oiseaux en simultané #max_boxes_to_draw=4, instance_masks=output_dict.get('detection_masks_reframed', None), line_thickness=4, use_normalized_coordinates=True, min_score_thresh=threshold, agnostic_mode=False) # Affichage des vérités terrain == annotations ou groundtruths for gt in groundtruths: # Extrait les 4 coordonnées pour chaque box ymin, xmin, ymax, xmax, species = gt viz_utils.draw_bounding_box_on_image_array( image_np_with_annotations, ymin, xmin, ymax, xmax, color='red', thickness=2, use_normalized_coordinates=False) #TODO img_annotee = Image.fromarray( np.uint8(image_np_with_annotations)).convert('RGB') draw = ImageDraw.Draw(img_annotee) for gt in groundtruths: ymin, xmin, ymax, xmax, species = gt (left, right, top, bottom) = (xmin, xmax, ymin, ymax) marginRight = 145 marginBottom = 30 """ draw.rectangle( [(right, bottom), (right, bottom)], outline='red', fill='red') """ draw.text((right - marginRight, bottom - marginBottom), species, fill='white', font=ImageFont.truetype(current_dir + "arial.ttf", 28)) #img_annotee.show() #input('W8') # Copie les vérités terrain sur l'image avec les prédictions np.copyto(image_np_with_annotations, np.array(img_annotee)) # Affichage des labels en légende plt.legend(handles=handles) # Affichage ou sauvegarde des résultats if saveImg: # Créé un dossier pour sauvegarder l'espèce courante if not os.path.exists(current_dir + espece): os.makedirs(current_dir + espece) # Sauvegarde de l'image dans le dossier de l'espèce courante # Le dossier de la sauvegarde est situé au même endroit que ce programme Python ! plt.subplots_adjust(right=0.7) plt.imsave(current_dir + espece + '/' + image_name, image_np_with_annotations) else: """ # Ajoute les vérités terrains des annotations, dans des rectangles en haut à droite des boxes annotées fig, ax = plt.subplots() (left, right, top, bottom) = (xmin, xmax, ymin, ymax) # Positionne le rectangle dans le coin supérieur droit marginRight = 120 rect = mpatches.Rectangle((right-marginRight, top), marginRight, 30, linewidth=2, edgecolor='r', facecolor='r') ax.add_patch(rect) ax.set_title('Appuyer sur "Q" pour défiler') # Ajout de l'espèce dans le rectangle rx, ry = rect.get_xy() cx = rx + rect.get_width()/2.0 cy = ry + rect.get_height()/2.0 ax.annotate(species, (cx, cy), color='black', weight='bold', fontsize=10, ha='center', va='center') """ # Affichage de l'image et des oiseaux reconnus avec leurs scores en plein écran #wm = plt.get_current_fig_manager() #wm.window.state('zoomed') plt.imshow(Image.fromarray(image_np_with_annotations)) plt.show() return elapsed_time
def OnTimer(self, event): #ret, self.image_np = self.capture.read() ret = self.capture.grab() if ret == True: #print("Captura OK") pass else: self.tiempo2=time.now() tiempoFinal=self.tiempo2-self.tiempo1 print("Tiempo de respuesta: ",tiempoFinal) print("Metricas de tiempo:") print("Minimo", self.tmin, "Maximo", self.tmax, "Promedio", (self.ttot / self.tcount) ) print("Metricas de deteccion:") print("Minimo", self.dmin, "Maximo", self.dmax, "Promedio", (self.davgtot / self.dcount),"Boxes Promedio",(self.dboxtot/self.dcount)) print("Frames con detecciones:",self.dcount) print("Falló la captura") exit(1) ret, self.image_np = self.capture.retrieve() #cv2.imshow('object detection', self.image_np) #self.image_np = cv2.rotate(self.image_np, rotateCode=cv2.ROTATE_90_CLOCKWISE) if self.analisis=='GRABANDO': self.image_np = cv2.rotate(self.image_np, rotateCode=cv2.ROTATE_90_CLOCKWISE) self.out.write(self.image_np) if self.FRECUENCIA_CNN==0: #ret, self.image_np = self.capture.retrieve() # EXAMPLE GRAY "cv2.COLOR_BGR2GRAY" # # EXAMPLE HSV "cv2.COLOR_BGR2HSV" # # EXAMPLE RGB "" #COLOR = cv2.COLOR_BGR2HSV #self.image_np = cv2.cvtColor(self.image_np, COLOR) '''COLOR = cv2.COLOR_BGR2GRAY self.image_np = cv2.cvtColor(self.image_np, COLOR) COLOR = cv2.COLOR_GRAY2BGR self.image_np = cv2.cvtColor(self.image_np, COLOR)''' '''self.image_np = cv2.rotate(self.image_np, rotateCode=cv2.ROTATE_180) self.image_np = cv2.flip(self.image_np, flipCode=1)''' cv2.imwrite('C:/Users/Administrador/Desktop/prototipo/tomas/imagen_' + str('{0:0{width}}'.format(self.imagenes,width=10)) + '.jpg',self.image_np) src_dir='C:/Users/Administrador/Desktop/prototipo/AnotationBase/AnotationBase'+self.anotation+'.xml' dst_dir='C:/Users/Administrador/Desktop/prototipo/tomas/imagen_' + str('{0:0{width}}'.format(self.imagenes,width=10)) + '.xml' shutil.copy(src_dir,dst_dir) self.imagenes+=1 '''if self.imagenes%10==0: playsound('piano-notification-2.mp3')''' #cv2.imshow( "Display window",self.image_np); #cv2.waitKey(0); #import pdb; pdb.set_trace() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] if self.RN==True: image_np_expanded = np.expand_dims(self.image_np, axis=0) # Metricas de deteccion y tiempo t1 = time.now() # Actual detection. (self.boxes, self.scores, self.classes, self.num) = self.sess.run([self.detection_boxes, self.detection_scores, self.detection_classes, self.num_detections],feed_dict={self.image_tensor: image_np_expanded}) # Metricas de tiempo t2 = time.now() tdif = (t2 - t1).total_seconds() #(b-a).total_seconds() if tdif < self.tmin: self.tmin = tdif if tdif > self.tmax: self.tmax = tdif self.ttot += tdif self.tcount += 1 box = np.squeeze(self.boxes) #Alto del frame en pixeles height = np.size(self.image_np, 0) #Ancho del frame en pixeles width = np.size(self.image_np, 1) ##Comparo cada rectangulo del xml con cada box de la CNN ##Si el porcentaje de coincidencia es mayor a PORC_INTERSECCION guardo "[OK] " ##Si no, guardo "[ ] " self.locations_state=[] personas=0 Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax') PORC_INTERSECCION=0.5 # Metricas de deteccion dtot = 0 dcount = 0 for index,value in enumerate(self.classes[0]): score = self.scores[0,index] if (score >= 0.5 and self.category_index.get(value).get('name')=="person"): dtot += score dcount += 1 if score < self.dmin: self.dmin = score if score > self.dmax: self.dmax = score if dcount > 0: self.dboxtot += dtot davg = dtot / dcount self.davgtot += davg self.dcount += 1 #Recorro las posiciones del xml for j in self.images_location: ymin = int(j[1]) xmin = int(j[0]) ymax = int(j[3]) xmax = int(j[2]) area_xml=(ymax-ymin)*(xmax-xmin) rxml = Rectangle(xmin, ymin, xmax, ymax) #Para cada posicion recorro las boxes buscando coincidencia coincide=False for index,value in enumerate(self.classes[0]): if self.scores[0,index] > self.THROBLESHOOT: if self.category_index.get(value).get('name')=="person": ymin = (int(box[index,0]*height)) xmin = (int(box[index,1]*width)) ymax = (int(box[index,2]*height)) xmax = (int(box[index,3]*width)) rbox = Rectangle(xmin, ymin, xmax, ymax) area_interseccion=self.area(rxml, rbox) if(area_interseccion!=None): if area_interseccion>(PORC_INTERSECCION*area_xml): coincide=True if coincide==True: self.locations_state.append("[OK]") personas+=1 else: self.locations_state.append("[ ]") print ("Se detectaron "+str(personas)+" personas\n") print (self.locations_state) print ("\n") self.cantOcupadas.Label=str(personas) self.cantLibres.Label=str(len(self.images_location)-personas) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( self.image_np, np.squeeze(self.boxes), np.squeeze(self.classes).astype(np.int32), np.squeeze(self.scores), self.category_index, use_normalized_coordinates=True, max_boxes_to_draw=100, line_thickness=6) #self.image_np = cv2.cvtColor(self.image_np, cv2.COLOR_BGR2RGB) #Convert to RGB ready to display to screen #self.image_np = cv2.resize(self.image_np, (self.Screen1Width, self.Screen1Height), interpolation = cv2.INTER_AREA) #Return a 320x240 RGB image #h, w = self.image_np.shape[:2] # get the height and width of the source image for buffer construction #self.wxbmp = wx.Bitmap.FromBuffer(w, h, self.image_np) # make a wx style bitmap using the buffer converter #self.Screen1.Refresh() for i in range(len(self.screen_list)): self.screen_list[i].staticBitmap.Refresh() self.FRECUENCIA_CNN=self.FREC else: self.FRECUENCIA_CNN-=1 ############################################### if self.RN==True and self.VisualBoxes==0: # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( self.image_np, np.squeeze(self.boxes), np.squeeze(self.classes).astype(np.int32), np.squeeze(self.scores), self.category_index, use_normalized_coordinates=True, max_boxes_to_draw=100, line_thickness=6) self.VisualBoxes+=1 if self.VisualBoxes==5: self.VisualBoxes=0 # Visualization of the results of a detection. '''vis_util.visualize_boxes_and_labels_on_image_array( self.image_np, np.squeeze(self.boxes), np.squeeze(self.classes).astype(np.int32), np.squeeze(self.scores), self.category_index, use_normalized_coordinates=True, max_boxes_to_draw=100, line_thickness=4)''' #cv2.flip(self.image_np, flipMode=-1) # Rotate 180 and flip horizontally '''self.image_np = cv2.rotate(self.image_np, rotateCode=cv2.ROTATE_180) self.image_np = cv2.flip(self.image_np, flipCode=1)''' #self.image_np = cv2.rotate(self.image_np, rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE) #self.image_np = cv2.rotate(self.image_np, rotateCode=cv2.ROTATE_90_CLOCKWISE) if self.analisis=='PAUSADO': #Recorro las posiciones del xml for j in self.images_location: xmin = int(j[1]) ymin = int(self.CaptureWidth-j[0]) xmax = int(j[3]) ymax = int(self.CaptureWidth-j[2]) nro = int(j[4]) vis_util.draw_bounding_box_on_image_array( self.image_np, ymin, xmin, ymax, xmax, color='red', #Color invertido, queda blue thickness=3, display_str_list= (str(nro)), use_normalized_coordinates=False) #Para cada posicion recorro las boxes buscando coincidencia '''height1, width1, channels = self.image_np.shape box_coords = ((int(width1/2),100), (5, 5)) rectangle_bgr = (255, 255, 255) cv2.rectangle(self.image_np, box_coords[0], box_coords[1], rectangle_bgr, -1)''' cv2.putText(self.image_np, "Anotar: "+self.anotation, (5, 30 ), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.putText(self.image_np, "Frames: "+str(self.imagenes-4000), (5, 60 ), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA) if self.analisis=='PAUSADO': cv2.putText(self.image_np, self.analisis, (5, 90 ), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 0), 1, cv2.LINE_AA) else: cv2.putText(self.image_np, self.analisis, (5, 90 ), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) self.image_np = cv2.cvtColor(self.image_np, cv2.COLOR_BGR2RGB) #Convert to RGB ready to display to screen self.image_np = cv2.resize(self.image_np, (self.Screen1Width, self.Screen1Height), interpolation = cv2.INTER_AREA) #Return a 320x240 RGB image h, w = self.image_np.shape[:2] # get the height and width of the source image for buffer construction self.wxbmp = wx.Bitmap.FromBuffer(w, h, self.image_np) # make a wx style bitmap using the buffer converter self.Screen1.Refresh() ''' #Indice para recorrer los estados actuales de las bancas estado=0 imagenes_bancas = { "[OK]": 'imagenes/bancaOcupada.png', "[ ]": 'imagenes/bancaLibre.png', "[?]": 'imagenes/bancaIndeterminado.png' } nombres_estados_bancas = { "[OK]": "ocupada", "[ ]": "libre", "[?]": "indeterminado" } #Seteo estado,posicion de cada StaticBitmap for i in self.screen_list: #Seteo imagen if self.locations_state: #Si la lista no esta vacia i.setEstado(nombres_estados_bancas[self.locations_state[estado]]) if i.getMouseEncima()==True: imageFile = self.imagenes_bancas_select[i.getEstado()] else: if i.getSeleccionado()==False: imageFile = imagenes_bancas[self.locations_state[estado]] else: imageFile = self.imagenes_bancas_select[i.getEstado()] else: if i.getMouseEncima()==True: imageFile = self.imagenes_bancas_select[i.getEstado()] else: if i.getSeleccionado()==False: imageFile = imagenes_bancas["[ ]"] else: imageFile = self.imagenes_bancas_select["libre"] i.setImagen(imageFile) #Seteo posicion proporcional al tamaño del screen y al tamaño de la captura separador=0 xoffset=-100 xmin,ymin=i.getPosicionXML() xpos=int((xmin/(self.CaptureWidth+750-separador))*self.Screen2Width)-separador+xoffset ypos=int((ymin/(self.CaptureHeight+350))*self.Screen2Height)-50 x, y = self.sizer_3.GetPosition() i.setPosicionVentana(x+xpos,y+ypos) #if self.num!=-1: # # Visualization of the results of a detection. # vis_util.visualize_boxes_and_labels_on_image_array( # self.image_np, # np.squeeze(self.boxes), # np.squeeze(self.classes).astype(np.int32), # np.squeeze(self.scores), # self.category_index, # use_normalized_coordinates=True, # line_thickness=4) #Dibujo rectangulo azul en camara de video indicando que ubicacion tengo apuntada con el mouse if i.getMouseEncima()==True: vis_util.draw_bounding_box_on_image_array( self.image_np, i.yminXML, i.xminXML, i.ymaxXML, i.xmaxXML, color='red', #Color invertido, queda blue thickness=3, display_str_list= (str(i.nro)), use_normalized_coordinates=False) #Dibujo rectangulo rojo en camara de video indicando que ubicacion seleccione con el mouse if i.getSeleccionado()==True: vis_util.draw_bounding_box_on_image_array( self.image_np, i.yminXML, i.xminXML, i.ymaxXML, i.xmaxXML, color='Blue', #Color invertido, queda red thickness=3, display_str_list= (str(i.nro)), use_normalized_coordinates=False) estado+=1 } ''' self.timer.Start(1000./self.fps) event.Skip()