def __init__(self): logger.info('Loading Tensorflow Detection API') weights_path = get_file(config.SSD_INCEPTION_FILENAME, config.SSD_INCEPTION_URL, cache_dir=os.path.abspath(config.WEIGHT_PATH), cache_subdir='models') extract_path = weights_path.replace('.tar.gz', '') if not os.path.exists(extract_path): tar = tarfile.open(weights_path, "r:gz") tar.extractall(path=os.path.join(config.WEIGHT_PATH, 'models')) tar.close() pb_path = os.path.join(extract_path, self.PB_NAME) self.graph = tf.Graph() with self.graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(pb_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) self.categories = label_map_util.convert_label_map_to_categories(self.label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(self.categories)
def main(unused_argv): assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.' tf.gfile.MakeDirs(FLAGS.eval_dir) if FLAGS.pipeline_config_path: configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.eval_dir, 'pipeline.config'), overwrite=True) else: configs = config_util.get_configs_from_multiple_files( model_config_path=FLAGS.model_config_path, eval_config_path=FLAGS.eval_config_path, eval_input_config_path=FLAGS.input_config_path) for name, config in [('model.config', FLAGS.model_config_path), ('eval.config', FLAGS.eval_config_path), ('input.config', FLAGS.input_config_path)]: tf.gfile.Copy(config, os.path.join(FLAGS.eval_dir, name), overwrite=True) model_config = configs['model'] eval_config = configs['eval_config'] input_config = configs['eval_input_config'] if FLAGS.eval_training_data: input_config = configs['train_input_config'] model_fn = functools.partial( model_builder.build, model_config=model_config, is_training=False) def get_next(config): return dataset_util.make_initializable_iterator( dataset_builder.build(config)).get_next() create_input_dict_fn = functools.partial(get_next, input_config) label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) if FLAGS.run_once: eval_config.max_evals = 1 graph_rewriter_fn = None if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=False) evaluator.evaluate( create_input_dict_fn, model_fn, eval_config, categories, FLAGS.checkpoint_dir, FLAGS.eval_dir, graph_hook_fn=graph_rewriter_fn)
def test_keep_categories_with_unique_id(self): label_map_proto = string_int_label_map_pb2.StringIntLabelMap() label_map_string = """ item { id:2 name:'cat' } item { id:1 name:'child' } item { id:1 name:'person' } item { id:1 name:'n00007846' } """ text_format.Merge(label_map_string, label_map_proto) categories = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=3) self.assertListEqual([{ 'id': 2, 'name': u'cat' }, { 'id': 1, 'name': u'child' }], categories)
def read_data_and_evaluate(input_config, eval_config): """Reads pre-computed object detections and groundtruth from tf_record. Args: input_config: input config proto of type object_detection.protos.InputReader. eval_config: evaluation config proto of type object_detection.protos.EvalConfig. Returns: Evaluated detections metrics. Raises: ValueError: if input_reader type is not supported or metric type is unknown. """ if input_config.WhichOneof('input_reader') == 'tf_record_input_reader': input_paths = input_config.tf_record_input_reader.input_path label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) object_detection_evaluators = evaluator.get_evaluators( eval_config, categories) # Support a single evaluator object_detection_evaluator = object_detection_evaluators[0] skipped_images = 0 processed_images = 0 for input_path in _generate_filenames(input_paths): tf.logging.info('Processing file: {0}'.format(input_path)) record_iterator = tf.python_io.tf_record_iterator(path=input_path) data_parser = tf_example_parser.TfExampleDetectionAndGTParser() for string_record in record_iterator: tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, processed_images) processed_images += 1 example = tf.train.Example() example.ParseFromString(string_record) decoded_dict = data_parser.parse(example) if decoded_dict: object_detection_evaluator.add_single_ground_truth_image_info( decoded_dict[standard_fields.DetectionResultFields.key], decoded_dict) object_detection_evaluator.add_single_detected_image_info( decoded_dict[standard_fields.DetectionResultFields.key], decoded_dict) else: skipped_images += 1 tf.logging.info('Skipped images: {0}'.format(skipped_images)) return object_detection_evaluator.evaluate() raise ValueError('Unsupported input_reader_config.')
def main(unused_argv): assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.' if FLAGS.pipeline_config_path: configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) else: configs = config_util.get_configs_from_multiple_files( model_config_path=FLAGS.model_config_path, eval_config_path=FLAGS.eval_config_path, eval_input_config_path=FLAGS.input_config_path) pipeline_proto = config_util.create_pipeline_proto_from_configs(configs) config_text = text_format.MessageToString(pipeline_proto) tf.gfile.MakeDirs(FLAGS.eval_dir) with tf.gfile.Open(os.path.join(FLAGS.eval_dir, 'pipeline.config'), 'wb') as f: f.write(config_text) model_config = configs['model'] lstm_config = configs['lstm_model'] eval_config = configs['eval_config'] input_config = configs['eval_input_config'] if FLAGS.eval_training_data: input_config.external_input_reader.CopyFrom( configs['train_input_config'].external_input_reader) lstm_config.eval_unroll_length = lstm_config.train_unroll_length model_fn = functools.partial( model_builder.build, model_config=model_config, lstm_config=lstm_config, is_training=False) def get_next(config, model_config, lstm_config, unroll_length): return seq_dataset_builder.build(config, model_config, lstm_config, unroll_length) create_input_dict_fn = functools.partial(get_next, input_config, model_config, lstm_config, lstm_config.eval_unroll_length) label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) if FLAGS.run_once: eval_config.max_evals = 1 evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, FLAGS.checkpoint_dir, FLAGS.eval_dir)
def test_convert_label_map_to_coco_categories_with_few_classes(self): label_map_proto = self._generate_label_map(num_classes=4) cat_no_offset = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=2) expected_categories_list = [{ 'name': u'1', 'id': 1 }, { 'name': u'2', 'id': 2 }] self.assertListEqual(expected_categories_list, cat_no_offset)
def test_convert_label_map_to_categories_no_label_map(self): categories = label_map_util.convert_label_map_to_categories( None, max_num_classes=3) expected_categories_list = [{ 'name': u'category_1', 'id': 1 }, { 'name': u'category_2', 'id': 2 }, { 'name': u'category_3', 'id': 3 }] self.assertListEqual(expected_categories_list, categories)
def test_convert_label_map_to_categories(self): label_map_proto = self._generate_label_map(num_classes=4) categories = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=3) expected_categories_list = [{ 'name': u'1', 'id': 1 }, { 'name': u'2', 'id': 2 }, { 'name': u'3', 'id': 3 }] self.assertListEqual(expected_categories_list, categories)
def __init__(self): self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) with self.detection_graph.as_default(): # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') self.tensor_dict = tensor_dict self.image_tensor = image_tensor self.label_map = label_map self.category_index = category_index self.session = tf.Session(graph=self.detection_graph)
def main(unused_argv): assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.' if FLAGS.pipeline_config_path: model_config, eval_config, input_config = get_configs_from_pipeline_file() else: model_config, eval_config, input_config = get_configs_from_multiple_files() model_fn = functools.partial( model_builder.build, model_config=model_config, is_training=False) create_input_dict_fn = functools.partial( input_reader_builder.build, input_config) label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, FLAGS.checkpoint_dir, FLAGS.eval_dir)
def get_label_index(label_path, num_classes): label_map = label_map_util.load_labelmap(label_path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_classes, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index
def main(args): my_flag = False; svo_filepath = None if len(args) > 1: svo_filepath = args[1] rospy.init_node('Human') human_pub=rospy.Publisher('human_dis', String, queue_size=1) rate=rospy.Rate(10) # This main thread will run the object detection, the capture thread is loaded later # What model to download and load #MODEL_NAME = 'ssd_mobilenet_v1_coco_2018_01_28' MODEL_NAME = 'ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03' #MODEL_NAME = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03' #MODEL_NAME = 'ssd_mobilenet_v1_coco_2018_01_28' #MODEL_NAME = 'faster_rcnn_nas_coco_2018_01_28' # Accurate but heavy # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_FROZEN_GRAPH = 'data/' + MODEL_NAME + '/frozen_inference_graph.pb' # Check if the model is already present if not os.path.isfile(PATH_TO_FROZEN_GRAPH): print("Downloading model " + MODEL_NAME + "...") MODEL_FILE = MODEL_NAME + '.tar.gz' MODEL_PATH = 'data/' + MODEL_NAME + '.tar.gz' DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' opener = urllib.request.URLopener() opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_PATH) tar_file = tarfile.open(MODEL_PATH) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, 'data/') # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # Start the capture thread with the ZED input print("Starting the ZED") capture_thread = Thread(target=capture_thread_func, kwargs={'svo_filepath': svo_filepath}) capture_thread.start() # Shared resources global image_np_global, depth_np_global, new_data, exit_signal # Load a (frozen) Tensorflow model into memory. print("Loading model " + MODEL_NAME) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Limit to a maximum of 50% the GPU memory usage taken by TF https://www.tensorflow.org/guide/using_gpu config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.5 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Detection with detection_graph.as_default(): with tf.Session(config=config, graph=detection_graph) as sess: while not exit_signal: # Expand dimensions since the model expects images to have shape: [1, None, None, 3] if new_data: lock.acquire() image_np = np.copy(image_np_global) depth_np = np.copy(depth_np_global) new_data = False lock.release() image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) num_detections_ = num_detections.astype(int)[0] # print(np.squeeze(scores)) # Visualization of the results of a detection. image_np = display_objects_distances( image_np, depth_np, num_detections_, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index,human_pub) cv2.imshow('ZED object detection', cv2.resize(image_np, (width, height))) if cv2.waitKey(10) & 0xFF == ord('q'): cv2.destroyAllWindows() exit_signal = True else: sleep(0.01) sess.close() exit_signal = True capture_thread.join()
if __name__ == "__main__": interpreter = interpreter_wrapper.Interpreter(model_path=model_file) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # check the type of the input tensor if input_details[0]['dtype'] == type(np.float32(1.0)): floating_model = True labels = load_labels(label_file) label_map = label_map_util.load_labelmap(label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) evaluators = get_evaluators(categories) counters = {'skipped': 0, 'success': 0} images_paths, annnots_paths = get_all_examples(test_data_file_path) num_examples = len(images_paths) #try: for batch in range(num_examples): file_name = images_paths[batch] annot_name = annnots_paths[batch] img = cv2.imread(file_name) img_shape = img.shape if (batch + 1) % 100 == 0:
def run_inference_graph_images(PATH_TO_FROZEN_GRAPH, PATH_TO_LABELS, \ NUM_CLASSES=1, TEST_IMAGE_PATHS, min_threshold, \ bb_outpath, PATH_TO_BB_HASHMAP): """This function takes in a list of image local-paths and runs it through the trained graph. Further, using the visualization function it draws bounding boxes on each of the images and saves it in a local path. Arguments: PATH_TO_FROZEN_GRAPH - local path of the trained frozen graph, '/frozen_inference_graph.pb' PATH_TO_LABELS - local path of the labels (a mapping from class number to class name), 'label_map_focus.pbtxt' NUM_CLASSES - number of detection classes TEST_IMAGE_PATHS - list of test image local paths min_threshold - minimum score threshold for the bounding box to be considered bb_outpath - local path where to save the images with ounding poxes, /home/ubuntu/data/tensorflow/my_workspace/camera-trap-detection/snapshot-safari/snapshot-serengeti/subject_set_upload/ PATH_TO_BB_HASHMAP - path where bounding box information for the subjects is saved """ # Loading the frozen graph detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) bb_hashmap = {} for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) if (len(np.array(image).shape) == 3): # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Considering the default dpi of matplotlib, calculating the figure size to save y0, x0, c = image_np.shape h = y0 / 72 # the default for dpi for matplotlib is 72 w = x0 / 72 # the default for dpi for matplotlib is 72 IMAGE_SIZE = (w, h) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image( image_np, detection_graph) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8, min_score_thresh=min_threshold, skip_labels=True, skip_scores=True, agnostic_mode=True) # make a figure without the frame fig = plt.figure(frameon=False, figsize=IMAGE_SIZE) # make the content fill the whole figure ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) # draw your image ax.imshow(image_np) plt.savefig( os.path.join(bb_outpath, '{0}'.format( image_path[-14:]))) # saving image with boxes on the disk plt.gcf().clear() bb_hashmap[image_path[-14:]] = { 'detection_boxes': output_dict['detection_boxes'][0:sum( output_dict['detection_scores'] >= min_threshold)], 'detection_scores': output_dict['detection_scores'][0:sum( output_dict['detection_scores'] >= min_threshold)] } with open(PATH_TO_BB_HASHMAP, 'w') as f: for key in bb_hashmap.keys(): f.write("%s,%s\n" % (key, bb_hashmap[key])) return bb_hashmap
PATH_TO_LABELS = os.path.join(CWD_PATH, 'saved_inference_graph_models', 'labelmap.pbtxt') # Path to image PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_NAME) # Number of classes the object detector can identify NUM_CLASSES = 1 # Load the label map. # Label maps map indices to category names, so that when our convolution # network predicts `5`, we know that this corresponds to `king`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier
# create a context manager that makes this model the default one for # execution with model.as_default(): # initialize the graph definition graphDef = tf.GraphDef() # load the graph from disk with tf.gfile.GFile(args["model"], "rb") as f: serializedGraph = f.read() graphDef.ParseFromString(serializedGraph) tf.import_graph_def(graphDef, name="") # load the class labels from disk labelMap = label_map_util.load_labelmap(args["labels"]) categories = label_map_util.convert_label_map_to_categories( labelMap, max_num_classes=args["num_classes"], use_display_name=True) categoryIdx = label_map_util.create_category_index(categories) # create a session to perform inference with model.as_default(): with tf.Session(graph=model) as sess: # initialize the points to the video files stream = cv2.VideoCapture(args["input"]) writer = None # loop over frames from the video file stream while True: # grab the next frame (grabbed, image) = stream.read() # if the frame was not grabbed, then we have reached the
def main(): # current camera frame global frame, annotatedFrame, frameQueue, currentFps, selectedIdx, selectedClassName, objectDistance, boxes, scores, stats global currentMode, M_AUTOMANEUVER, M_AUTONAV, M_MANUAL # print(cv2.getBuildInformation()) print("Loading model") detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(CHKPT_PATH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(LABELS_PATH) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=2, use_display_name=True) category_index = label_map_util.create_category_index(categories) print("Starting main python module") if not DEBUG_DISABLE_FLIGHT: flightData = Drone(updateFlightInfo) process = Thread(target=flight.flightMain, args=(flightData, )) process.start() ip = '0.0.0.0' server = ThreadedHTTPServer((ip, 9090), CamHandler) target = Thread(target=server.serve_forever, args=()) i = 0 # To flip the image, modify the flip_method parameter (0 and 2 are the most common) #print(gstreamer_pipeline(flip_method=0)) cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=2), cv2.CAP_GSTREAMER) fpsSmoothing = 70 lastUpdate = time.time() try: if cap.isOpened(): print("CSI Camera opened") graph_options = tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, )) OptConfig = tf.ConfigProto(graph_options=graph_options) with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=OptConfig) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object # was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class # label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') i = 0 print("TensorFlow session loaded.") while mainThreadRunning: ret_val, img = cap.read() frame = img # convert OpenCV's BGR to RGB as the model # was trained on RGB images color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # resize image to model size of 360x270 color_frame = cv2.resize(color_frame, (360, 270), interpolation=cv2.INTER_CUBIC) image_np_expanded = np.expand_dims(color_frame, axis=0) # Actual detection (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Draw boxes using TF library, should be off during competition if useBoxVisualization: vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=4, min_score_thresh=MIN_CONFIDENCE) # Now that we have the detected BBoxes, it's time to determine our current obstacle # First, gather stats about the bounding boxes # squeezing makes it so you can do access box[i] directly instead of having to # access box[0][i] boxes = np.squeeze(boxes) classes = np.squeeze(classes) scores = np.squeeze(scores) stats = [] j = 0 # This is 15ft, any object farther than that is a misidentification lowestDistance = 15 if DEBUG_DUMP_DETECTIONS: print("Boxes // Classes // Scores") print(boxes) print(classes) print(scores) # Reset selections selectedIdx = None if len(boxes) > 0: for j in range(0, len(boxes)): if scores[j] >= MIN_CONFIDENCE: stats.insert( j, getBoxStats(boxes[j], classes[j])) # print("box[%d] distance is %f" % (j, stats[j]['distance'])) if stats[j]['distance'] < lowestDistance: selectedIdx = j selectedClassName = classToString( classes[j]) objectDistance = stats[j]['distance'] lowestDistance = objectDistance #print("Selected box[%d]: distance %f class %s conf %f" % (j, objectDistance, selectedClassName, scores[j])) else: # Skip calculations on this box if it does not meet # confidence threshold stats.insert(j, 0) if not DEBUG_DISABLE_FLIGHT: if selectedIdx is not None: flightData.upData(stats[selectedIdx], selectedClassName) else: flightData.upData(None, "None") # add the HUD to the current image annotatedFrame = applyHud() # currentFrameTime = time.time() #if frameQueue.full(): # with frameQueue.mutex: # frameQueue.queue.clear() frameQueue.put(annotatedFrame.copy()) if i == 0: target.start() print("Starting MJPEG stream") i += 1 # FPS smoothing algorithm frameTime = time.time() - lastUpdate frameFps = 1 / frameTime currentFps += (frameFps - currentFps) / fpsSmoothing lastUpdate = time.time() cap.release() else: print("FATAL: Unable to open camera") except KeyboardInterrupt: sys.exit()
def main(): # This main thread will run the object detection, the capture thread is loaded later # Some values standing for useful files PATH_TO_FROZEN_GRAPH = 'model/frozen_inference_graph.pb' PATH_TO_LABELS = 'model/labelmap.pbtxt' NUM_CLASSES = 1 # Starting the ZED capture print("Starting the ZED") capture_thread = Thread(target=capture_thread_func) capture_thread.start() # Sharing variables used by threads global image_np_global, depth_np_global, new_data, exit_signal # Load a (frozen) Tensorflow model into memory. print("Loading model") detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.8 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Detection with detection_graph.as_default() and tf.Session( config=config, graph=detection_graph) as sess: while not exit_signal: if new_data: lock.acquire() image_np = np.copy(image_np_global) depth_np = np.copy(depth_np_global) new_data = False lock.release() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') boxes = detection_graph.get_tensor_by_name('detection_boxes:0') scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) num_detections_ = num_detections.astype(int)[0] # Visualization of the results of a detection and storing targets positions image_np, voi.target_list = display_objects_distances( image_np, depth_np, num_detections_, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index) # Triggering robot zed_robot.set_ang_and_vel(voi.target_list, voi.coord[:2], voi.rotation[2] + 90) #read lidar # lidar_points = lidar.read() #print(depth_np_global[50][50]) # print(lidar_points) # Displaying image through OpenCV cv2.imshow('ZED object detection', cv2.resize(image_np, (width, height))) if cv2.waitKey(10) & 0xFF == ord('q'): cv2.destroyAllWindows() exit_signal = True else: sleep(0.01) sess.close() exit_signal = True capture_thread.join()
def create_category_index(path_labels_map): num_classes = 6 label_map = label_map_util.load_labelmap(path_labels_map) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_classes, use_display_name=True) return label_map_util.create_category_index(categories)
parser.add_argument( "-max-b", "--max-boxes", dest='max_boxes', type=int, default=DETECTION_CONFIG["max_boxes_to_draw"], help="Max number of boxes to draw at a time, default is {default}.".format( default=DETECTION_CONFIG["max_boxes_to_draw"])) args = parser.parse_args() # Load labelmap file. label_map = label_map_util.load_labelmap(DETECTION_CONFIG["labelmap_path"]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=DETECTION_CONFIG["num_classes"], use_display_name=True) category_index = label_map_util.create_category_index(categories) # Loads a frozen Tensorflow model in memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FROZEN_MODEL_PATH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') def detect_items(image_path, session): # Read input image.
def test(pipeline_config_path, model_dir, label_map_path, test_data_dir, inference_dir): Path(inference_dir).mkdir(parents=True, exist_ok=True) configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) model_config = configs['model'] detection_model = model_builder.build(model_config=model_config, is_training=False) ckpt = tf.compat.v2.train.Checkpoint(model=detection_model) ckpt.restore(model_dir) # detect_fn = get_model_detection_function(detection_model) label_map = label_map_util.load_labelmap(label_map_path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=label_map_util.get_max_label_map_index(label_map), use_display_name=True) category_index = label_map_util.create_category_index(categories) all_image = get_all_image_files(test_data_dir) for image_path in all_image: file_name = os.path.basename(image_path) name, image_format = os.path.splitext(image_path) try: image_np = load_image_into_numpy_array(image_path) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections, predictions_dict, shapes = detect_fn( detection_model, input_tensor) width = shapes.numpy()[1] height = shapes.numpy()[0] boxes = detections['detection_boxes'][0].numpy() classes = (detections['detection_classes'][0].numpy() + 1).astype(int) scores = detections['detection_scores'][0].numpy() display_str = f'image : {file_name}' for i in range(boxes.shape[0]): score = round(100 * scores[i]) if score >= 25: # print(boxes[i]) display_str = f'{display_str} / {category_index[classes[i]]["name"]}: {str(round(100 * scores[i]))}% ' \ f'({str(boxes[i])})' ## xmin, ymin, xmax, ymax print(display_str) label_id_offset = 1 image_np_with_detections = image_np.copy() viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detections['detection_boxes'][0].numpy(), (detections['detection_classes'][0].numpy() + label_id_offset).astype(int), detections['detection_scores'][0].numpy(), category_index, use_normalized_coordinates=True, max_boxes_to_draw=200, min_score_thresh=.25, agnostic_mode=False, ) save_img = cv2.cvtColor(image_np_with_detections, cv2.COLOR_BGR2RGB) class_dir = os.path.dirname(image_path).split('/')[-1] Path(os.path.join(inference_dir, class_dir)).mkdir(parents=True, exist_ok=True) shutil.copy( f'{name}-o.csv', os.path.join(inference_dir, class_dir, f'{os.path.basename(name)}-o.csv')) cv2.imwrite(os.path.join(inference_dir, class_dir, file_name), save_img) except Exception as e: print(f'### Exception : {file_name} - {str(e)}')
def detect_in_video(video_path): # VideoWriter is the responsible of creating a copy of the video # used for the detections but with the detections overlays. Keep in # mind the frame size has to be the same as original video. # out = cv2.VideoWriter('../temp/' + 'WIN_20191218_11_03_57_Pro.mp4', cv2.VideoWriter_fourcc( # 'M', 'J', 'P', 'G'), 10, (1280, 720)) if is_yolo: print('yolo!') configuration = tf.ConfigProto(device_count={"GPU": 0}) sess = tf.Session(config=configuration) input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=1, score_thresh=0.2, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, restore_path) else: detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') configuration = tf.ConfigProto(device_count={"GPU": 0}) sess = tf.Session(config=configuration, graph=detection_graph) # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object # was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class # label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) frame_statistics = [] frame_id = 1 is_skip_frame = True frame_skip_count = 0 # Создать директорию с кадрами для заданного видео video_base_name = os.path.basename(video_path) video_name = os.path.splitext(video_base_name)[0] video_dir = join(os.path.dirname(video_path), video_name) images_dir = "images" video_images_dir = join(video_dir, images_dir) if not os.path.exists(video_images_dir): os.makedirs(video_images_dir) else: # Удалить все кадры из целевой директории remove_files_in_dir(video_images_dir) video_images_dir_rat = join(video_images_dir, 'rat') video_images_dir_mouse = join(video_images_dir, 'mouse') os.makedirs(video_images_dir_rat, exist_ok=True) os.makedirs(video_images_dir_mouse, exist_ok=True) remove_files_in_dir(video_images_dir_rat) remove_files_in_dir(video_images_dir_mouse) # Загрузка видео cap = cv2.VideoCapture(video_path) video_frame_cnt = int(cap.get(7)) video_width = int(cap.get(3)) video_height = int(cap.get(4)) video_fps = int(cap.get(5)) # Узнать разрешение видео video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # Указать разрешение картинок cur_dir = os.getcwd() os.chdir(video_images_dir) while cap.isOpened(): # Read the frame ret, frame = cap.read() if frame is not None: # Recolor the frame. By default, OpenCV uses BGR color space. # This short blog post explains this better: # https://www.learnopencv.com/why-does-opencv-use-bgr-color-format/ # color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if not is_skip_frame: if is_yolo: print('yoloo!!') if is_letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize( frame, new_size[0], new_size[1]) else: height_ori, width_ori = frame.shape[:2] img = cv2.resize(frame, tuple(new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start_time = time.time() boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={input_data: img}) end_time = time.time() # rescale the coordinates to the original image if is_letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(new_size[1])) for i in range(len(boxes_)): if scores_[i] == max(scores_): x0, y0, x1, y1 = boxes_[i] plot_one_box(frame, [x0, y0, x1, y1], label=classes_yolo[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) rodent_confidence = scores_[i] rodent_class_id = labels_[i] + 1 rodent_class_name = classes_yolo[labels_[i]] if rodent_confidence >= .20: frame_statistics.append({ 'frame_id': frame_id, 'confidence': rodent_confidence, 'rodent_class_id': rodent_class_id, 'rodent_class_name': rodent_class_name, }) # Сохранить кадр frame_name = rodent_class_name + '/image' + str( frame_id) + '.jpg' cv2.imwrite(frame_name, frame) # Сохранить xml-файл #scores = np.squeeze(scores[0]) #bbox_coords = boxes[0] #writer = Writer('.', video_width, video_height) #writer.addObject(rodent_class_name, bbox_coords[1] * video_width, #bbox_coords[0] * video_height, bbox_coords[3] * video_width, #bbox_coords[2] * video_height) #writer.save('image' + str(frame_id) + '.xml') #else: # Сохранить кадр #frame_name = 'image' + str(frame_id) + '.jpg' #cv2.imwrite(frame_name, frame) cv2.putText(frame, '{:.2f}ms'.format( (end_time - start_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2) else: image_np_expanded = np.expand_dims(frame, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. # note: perform the detections using a higher threshold vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes[0]), np.squeeze(classes[0]).astype(np.int32), np.squeeze(scores[0]), category_index, use_normalized_coordinates=True, line_thickness=8, max_boxes_to_draw=1, min_score_thresh=.20) # rodent_confidence = np.squeeze(scores[0])[0] # rodent_class_id = np.squeeze(classes[0]).astype(np.int32)[0] # rodent_class_name = category_index[rodent_class_id]['name'] # if rodent_confidence > .20: # frame_statistics.append({'frame_id': frame_id, # 'confidence': rodent_confidence, # 'rodent_class_id': rodent_class_id, # 'rodent_class_name': rodent_class_name, # }) # # # Сохранить кадр # frame_name = rodent_class_name + '/image' + str(frame_id) + '.jpg' # cv2.imwrite(frame_name, frame) # # # Сохранить xml-файл # scores = np.squeeze(scores[0]) # for i in range(min(1, np.squeeze(boxes[0]).shape[0])): # if scores is None or scores[i] > .20: # boxes = tuple(boxes[i].tolist()) # # bbox_coords = boxes[0] # writer = Writer('.', video_width, video_height) # writer.addObject(rodent_class_name, bbox_coords[1] * video_width, # bbox_coords[0] * video_height, bbox_coords[3] * video_width, # bbox_coords[2] * video_height) # writer.save('image' + str(frame_id) + '.xml') # else: # # Сохранить кадр # frame_name = 'image' + str(frame_id) + '.jpg' # cv2.imwrite(frame_name, frame) cv2.imshow('frame', cv2.resize(frame, (800, 600))) output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # out.write(output_rgb # Пропустить кадр, если необходимо if is_skip_frame: while 1: key = cv2.waitKey(1) if key == 32: # Нажата клавиша "space" frame_skip_count += 1 print("Вы пропустили " + str(frame_skip_count) + " кадр") break elif key == 113 or key == 233: # Нажата клавиша 'q' ('й') is_skip_frame = False break if cv2.waitKey(1) & 0xFF == ord('q'): break frame_id += 1 # out.release() os.chdir(cur_dir) cap.release() cv2.destroyAllWindows() statistics = { 'frame_count': frame_id, # Количество кадров 'frame_skip_count': frame_skip_count, # Количество пропущенных кадров 'frame_rodent_count': 0, # Количество кадров с грызуном 'frame_rat_count': 0, # Количество кадров с крысой 'frame_mouse_count': 0, # Количество кадров с мышью 'sum_confidence_rat': 0, # Сумма вероятностей крысы на видео 'sum_confidence_mouse': 0, # Сумма вероятностей мыши на видео 'mean_confidence_rat': 0, # Средняя вероятность крысы на видео 'mean_confidence_mouse': 0 # Средняя вероятность мыши на видео } for frame_statistic in frame_statistics: if frame_statistic['rodent_class_name'] == 'rat': statistics['frame_rodent_count'] += 1 statistics['frame_rat_count'] += 1 statistics['sum_confidence_rat'] += frame_statistic['confidence'] statistics['mean_confidence_rat'] = statistics[ 'sum_confidence_rat'] / statistics['frame_rat_count'] elif frame_statistic['rodent_class_name'] == 'mouse': statistics['frame_rodent_count'] += 1 statistics['frame_mouse_count'] += 1 statistics['sum_confidence_mouse'] += frame_statistic['confidence'] statistics['mean_confidence_mouse'] = statistics[ 'sum_confidence_mouse'] / statistics['frame_mouse_count'] print('----->>> Результаты обнаружения <<<-----') print('Количество кадров: ' + str(statistics['frame_count'])) print('Количество пропущенных кадров: ' + str(statistics['frame_skip_count'])) print('Количество кадров с грызуном: ' + str(statistics['frame_rodent_count'])) print('Количество кадров с крысой: ' + str(statistics['frame_rat_count'])) print('Количество кадров с мышью: ' + str(statistics['frame_mouse_count'])) print('Средняя вероятность крысы на видео: ' + str(statistics['mean_confidence_rat'])) print('Средняя вероятность мыши на видео: ' + str(statistics['mean_confidence_mouse']))
def load_label_map(): global category_index, PATH_TO_LABELS, NUM_CLASSES label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories)
def run(self): time1 = time.time() MIN_ratio = 0.9 # MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' MODEL_NAME = 'faster_rcnn_inception_v2_coco_2018_01_28' GRAPH_FILE_NAME = 'frozen_inference_graph.pb' LABEL_FILE = 'data/mscoco_label_map.pbtxt' NUM_CLASSES = 90 # end define label_map = lmu.load_labelmap(LABEL_FILE) categories = lmu.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) categories_index = lmu.create_category_index(categories) print("call label_map & categories : %0.5f" % (time.time() - time1)) graph_file = MODEL_NAME + '/' + GRAPH_FILE_NAME # thread function def find_detection_target(categories_index, classes, scores): time1_1 = time.time() # 스레드함수 시작시간 print("스레드 시작") objects = [] # 리스트 생성 for index, value in enumerate(classes[0]): object_dict = {} # 딕셔너리 if scores[0][index] > MIN_ratio: object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \ scores[0][index] objects.append(object_dict) # 리스트 추가 print(objects) print("스레드 함수 처리시간 %0.5f" & (time.time() - time1_1)) # end thread function detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(graph_file, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sses = tf.Session(graph=detection_graph) print("store in memoey time : %0.5f" % (time.time() - time1)) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') print("make tensor time : %0.5f" % (time.time() - time1)) prevtime = 0 # thread_1 = Process(target = find_detection_target, args = (categories_index, classes, scores))#쓰레드 생성 print("road Video time : %0.5f" % (time.time() - time1)) while True: ret, frame = capture.read() frame_expanded = np.expand_dims(frame, axis=0) height, width, channel = frame.shape (boxes, scores, classes, nums) = sses.run( # np.ndarray [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) # end sses.run() # objects = [] #리스트 생성 for index, value in enumerate(classes[0]): object_dict = {} # 딕셔너리 if scores[0][index] > MIN_ratio: object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \ scores[0][index] # objects.append(object_dict) #리스트 추가 # visualize_boxes_and_labels_on_image_array box_size_info 이미지 정 # for box, color in box_to_color_map.items(): # ymin, xmin, ymax, xmax = box # [index][0] [1] [2] [3] ymin = int((boxes[0][index][0] * height)) xmin = int((boxes[0][index][1] * width)) ymax = int((boxes[0][index][2] * height)) xmax = int((boxes[0][index][3] * width)) Result = frame[ymin:ymax, xmin:xmax] cv2.imwrite('car.jpg', Result) print('b') try: result_chars = NP.number_recognition('car.jpg') ui.label_6.setText(result_chars) # print(NP.check()) except: print("응안돼") # print(objects) key = cv2.waitKey(1) & 0xFF if key == ord("q"): break
def read_data_and_evaluate(input_config, eval_config): """Reads pre-computed object detections and groundtruth from tf_record. Args: input_config: input config proto of type object_detection.protos.InputReader. eval_config: evaluation config proto of type object_detection.protos.EvalConfig. Returns: Evaluated detections metrics. Raises: ValueError: if input_reader type is not supported or metric type is unknown. """ if input_config.WhichOneof('input_reader') == 'tf_record_input_reader': input_paths = input_config.tf_record_input_reader.input_path label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) object_detection_evaluators = evaluator.get_evaluators( eval_config, categories) # Support a single evaluator object_detection_evaluator = object_detection_evaluators[0] skipped_images = 0 processed_images = 0 for input_path in _generate_filenames(input_paths): tf.logging.info('Processing file: {0}'.format(input_path)) record_iterator = tf.python_io.tf_record_iterator(path=input_path) data_parser = tf_example_parser.TfExampleDetectionAndGTParser() for string_record in record_iterator: tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, processed_images) processed_images += 1 example = tf.train.Example() example.ParseFromString(string_record) decoded_dict = data_parser.parse(example) if decoded_dict: object_detection_evaluator.add_single_ground_truth_image_info( decoded_dict[ standard_fields.DetectionResultFields.key], decoded_dict) object_detection_evaluator.add_single_detected_image_info( decoded_dict[ standard_fields.DetectionResultFields.key], decoded_dict) else: skipped_images += 1 tf.logging.info( 'Skipped images: {0}'.format(skipped_images)) return object_detection_evaluator.evaluate() raise ValueError('Unsupported input_reader_config.')
BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # Path to frozen detection graph. This is the actual model that is used for the object detection. MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' LABEL_MAP_FILE = 'mscoco_label_map.pbtxt' PATH_TO_CKPT = os.path.join(BASE_DIR, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb') # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join(BASE_DIR, 'object_detection', 'data', LABEL_MAP_FILE) # Loading label map LABEL_MAP = label_map_util.load_labelmap(PATH_TO_LABELS) # though mobilenet can handle CATEGORIES = label_map_util.convert_label_map_to_categories( LABEL_MAP, max_num_classes=90, use_display_name=True) CATEGORY_INDEX = label_map_util.create_category_index(CATEGORIES) LABEL_KEYS = 'category instance confidence'.split() COLOR_KEYS = 'black white red orange yellow green cyan blue purple pink'.split( ) BB_KEYS = 'x y z width height depth'.split() OBJECT_VECTOR_KEYS = LABEL_KEYS + BB_KEYS + COLOR_KEYS class ObjectSeries(pd.Series): LABEL_KEYS = LABEL_KEYS COLOR_KEYS = COLOR_KEYS BB_KEYS = BB_KEYS OBJECT_VECTOR_KEYS = OBJECT_VECTOR_KEYS
def __init__(self, graph_path, label_path, num_classes): import _init_paths detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(graph_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) # Predefine image size as required by SSD self.image_shape = [365, 640, 3] # Predefine confidence threshold self.thresh = 0.3 ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, self.image_shape[0], self.image_shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') self.image_tensor = image_tensor self.tensor_dict = tensor_dict label_map = label_map_util.load_labelmap(label_path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_classes, use_display_name=True) self.category_index = label_map_util.create_category_index( categories)
def _load_label_map(self): label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index
def setup_platform(hass, config, add_entities, discovery_info=None): """Set up the TensorFlow image processing platform.""" model_config = config.get(CONF_MODEL) model_dir = model_config.get(CONF_MODEL_DIR) \ or hass.config.path('tensorflow') labels = model_config.get(CONF_LABELS) \ or hass.config.path('tensorflow', 'object_detection', 'data', 'mscoco_label_map.pbtxt') # Make sure locations exist if not os.path.isdir(model_dir) or not os.path.exists(labels): _LOGGER.error("Unable to locate tensorflow models or label map") return # append custom model path to sys.path sys.path.append(model_dir) try: # Verify that the TensorFlow Object Detection API is pre-installed # pylint: disable=unused-import,unused-variable os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf # noqa from object_detection.utils import label_map_util # noqa except ImportError: # pylint: disable=line-too-long _LOGGER.error( "No TensorFlow Object Detection library found! Install or compile " "for your system following instructions here: " "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md") # noqa return try: # Display warning that PIL will be used if no OpenCV is found. # pylint: disable=unused-import,unused-variable import cv2 # noqa except ImportError: _LOGGER.warning( "No OpenCV library found. TensorFlow will process image with " "PIL at reduced resolution") # Set up Tensorflow graph, session, and label map to pass to processor # pylint: disable=no-member detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') session = tf.Session(graph=detection_graph) label_map = label_map_util.load_labelmap(labels) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=90, use_display_name=True) category_index = label_map_util.create_category_index(categories) entities = [] for camera in config[CONF_SOURCE]: entities.append(TensorFlowImageProcessor( hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME), session, detection_graph, category_index, config)) add_entities(entities)
def main(): parser = argparse.ArgumentParser( description="run inference by using specified model", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('model_name', help="specify the model name") parser.add_argument('work_dir', help="specify the work space directory") parser.add_argument('--model_dir', default=None, help="specify the dir storing models.") args = parser.parse_args() model_dir = args.model_dir if model_dir is None: assert os.getenv('MODEL_INPUT_DIR') is not None model_dir = os.path.join(os.getenv('MODEL_INPUT_DIR'), 'object_detection') model_name = args.model_name model_file = model_name + '.tar.gz' tar_file = tarfile.open(os.path.join(model_dir, model_file)) recorded_name = model_name for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: recorded_name = file.name tar_file.extract(file, args.work_dir) PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') PATH_TO_CKPT = os.path.join(args.work_dir, recorded_name) NUM_CLASSES = 90 detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name=model_name) label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) PATH_TO_TEST_IMAGES_DIR = 'test_images' TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 2)] with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name( '{}/image_tensor:0'.format(model_name)) detection_boxes = detection_graph.get_tensor_by_name( '{}/detection_boxes:0'.format(model_name)) detection_scores = detection_graph.get_tensor_by_name( '{}/detection_scores:0'.format(model_name)) detection_classes = detection_graph.get_tensor_by_name( '{}/detection_classes:0'.format(model_name)) num_detections = detection_graph.get_tensor_by_name( '{}/num_detections:0'.format(model_name)) for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() results = sess.run([detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}, options=options, run_metadata=run_metadata) cg = CompGraph(model_name, run_metadata, detection_graph) cg_tensor_dict = cg.get_tensors() cg_sorted_keys = sorted(cg_tensor_dict.keys()) #cg_sorted_shape = [] #for cg_key in cg_sorted_keys: # print(cg_key) # t = tf.shape(cg_tensor_dict[cg_key]) # cg_sorted_shape.append(t.eval(feed_dict={image_tensor: image_np_expanded}, # session=sess)) cg_sorted_items = [] for cg_key in cg_sorted_keys: cg_sorted_items.append(tf.shape(cg_tensor_dict[cg_key])) cg_sorted_shape = sess.run(cg_sorted_items, feed_dict={image_tensor: image_np_expanded}) cg.op_analysis(dict(zip(cg_sorted_keys, cg_sorted_shape)), '{}.pickle'.format(model_name)) print('Image: {}, number of detected: {}'.format( image_path, len(results[3])))
def setup_platform(hass, config, add_entities, discovery_info=None): """Set up the TensorFlow image processing platform.""" model_config = config.get(CONF_MODEL) model_dir = model_config.get(CONF_MODEL_DIR) \ or hass.config.path('tensorflow') labels = model_config.get(CONF_LABELS) \ or hass.config.path('tensorflow', 'object_detection', 'data', 'mscoco_label_map.pbtxt') # Make sure locations exist if not os.path.isdir(model_dir) or not os.path.exists(labels): _LOGGER.error("Unable to locate tensorflow models or label map") return # append custom model path to sys.path sys.path.append(model_dir) try: # Verify that the TensorFlow Object Detection API is pre-installed # pylint: disable=unused-import,unused-variable os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf # noqa from object_detection.utils import label_map_util # noqa except ImportError: # pylint: disable=line-too-long _LOGGER.error( "No TensorFlow Object Detection library found! Install or compile " "for your system following instructions here: " "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md") # noqa return try: # Display warning that PIL will be used if no OpenCV is found. # pylint: disable=unused-import,unused-variable import cv2 # noqa except ImportError: _LOGGER.warning( "No OpenCV library found. TensorFlow will process image with " "PIL at reduced resolution") # setup tensorflow graph, session, and label map to pass to processor # pylint: disable=no-member detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') session = tf.Session(graph=detection_graph) label_map = label_map_util.load_labelmap(labels) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=90, use_display_name=True) category_index = label_map_util.create_category_index(categories) entities = [] for camera in config[CONF_SOURCE]: entities.append(TensorFlowImageProcessor( hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME), session, detection_graph, category_index, config)) add_entities(entities)
def pipeline(cap): # Default resolutions of the frame are obtained.The default resolutions are system dependent. # We convert the resolutions from float to integer. frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) print('-------SIZES-----') print(frame_width, frame_height) # Define the codec and create VideoWriter object.The output is stored in 'output.avi' file. out = cv2.VideoWriter(FILE_OUTPUT, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height)) sys.path.append("..") # Object detection imports # Here are the imports from the object detection module. from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util # Model preparation MODEL_NAME = 'ssd_mobilenet_v2_quantized_300x300_coco' PATH_TO_CKPT = 'trained-inference-graphs/output_inference_graph_v2/frozen_inference_graph.pb' # PATH_TO_LABELS = os.path.join('data', '<LABEL_NAME>.pbtxt') PATH_TO_LABELS = 'annotations/label_map.pbtxt' NUM_CLASSES = 3 TEST_IMAGE_PATHS = 'image_frames_' # Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') print('------------PRE----------') while (cap.isOpened()): # Capture frame-by-frame ret, frame = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(frame, axis=0) # Actual detection. start = time() #print('START TIME', start) (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) end = time() #print('END TIME', end) inference_time = end - start #print('INFERENCE TIME', inference_time) print('------ACTUAL DETECTION-----') # print('image exp>', image_np_expanded) # print('boxes', np.squeeze(boxes)) # print('scores', np.squeeze(scores)) # print('classes', np.squeeze(classes)) # print('num', np.squeeze(num)) # Here output the category as string and score to terminal #print([category_index.get(i) for i in classes[0]]) # print(scores) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) objects = [] threshold = 0.3 # in order to get higher percentages you need to lower this number; usually at 0.01 you get 100% predicted objects for index, value in enumerate(classes[0]): object_dict = {} if scores[0, index] > threshold: # object_dict[(category_index.get(value)).get('name').encode('utf8')] = \ # scores[0, index] object_dict['start'] = start object_dict['end'] = end object_dict['prediction'] = ( category_index.get(value)).get('name') object_dict['probability'] = scores[0, index] object_dict['inference_time'] = inference_time # print('NAME1>>>', (category_index.get(value))) # print('NAME2>>>', (category_index.get(value)).get('name')) objects.append(object_dict) print('Objects>>', objects) #print('OKAY???', len(np.where(scores[0] > threshold)[0])/num_detections[0]) # if ret == True: # # Saves for video # #out.write(frame) # # Display the resulting frame # #cv2.imshow('Charving Detection', frame) # #Close window when "Q" button pressed # if cv2.waitKey(1) & 0xFF == ord('q'): # break # else: # break # end = time() # print('end>>', end) # When everything done, release the video capture and video write objects cap.release() # out.release() # Closes all the frames cv2.destroyAllWindows()
def main(): print("Creating eval directory") os.makedirs(OUT_PATH_EVAL_IMAGES, exist_ok=True) # load frozen graph in memory detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # load label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) TEST_IMAGE_PATHS = [] for im_file in os.listdir(PATH_TO_TEST_IMAGES_DIR): # print(im_file) if im_file.endswith(".jpeg") and not os.path.isfile( os.path.join(PATH_TO_TEST_IMAGES_DIR, im_file.replace(".jpeg", ".xml"))): TEST_IMAGE_PATHS.append(os.path.join(PATH_TO_TEST_IMAGES_DIR, im_file)) # TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 8) ] print(len(TEST_IMAGE_PATHS)) # Size, in inches, of the output images. IMAGE_SIZE = (192, 128) subset_test = TEST_IMAGE_PATHS[:] print("We are going to run the inference for {} images".format(len(subset_test))) shuffle(subset_test) for image_path in subset_test: out_debug_image_path = os.path.join(OUT_PATH_EVAL_IMAGES, os.path.basename(image_path)) if os.path.isfile(out_debug_image_path): continue image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection. # draw only poles vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=4, exclude_classes=["player"]) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) #draw_court_lines_from_detections(image_np, output_dict['detection_boxes'], # output_dict['detection_classes'], # output_dict['detection_scores']) Image.fromarray(image_np).save(out_debug_image_path)
def initialize_labels(self): path_to_label = os.path.join(self.model_name, 'label.pbtxt') label_map = label_map_util.load_labelmap(path=path_to_label) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.num_class, use_display_name=True) self.category_index = label_map_util.create_category_index(categories)
with detection_graph.as_default(): # with sess.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(args['model'], 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.2 sess = tf.Session(graph=detection_graph, config=config) label_map = label_map_util.load_labelmap(args['pbtxt']) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=args['number_of_classes'], use_display_name=True) category_index = label_map_util.create_category_index(categories) # Read and preprocess an image. print("[INFO] starting video stream...") vs = cv2.VideoCapture(0 + cv2.CAP_DSHOW) vs.set(cv2.CAP_PROP_SETTINGS, 1) while True: _, frame = vs.read() rows = frame.shape[0] cols = frame.shape[1] # frame = cv2.resize(frame, (300, 300)) # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # frame = frame[...,[2,0,1]]
from object_detection.utils import visualization_utils as vis_util CWD_PATH = os.getcwd() # Path to frozen detection graph. This is the actual model that is used for the object detection. MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb') # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) def detect_objects(image_np, sess, detection_graph): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0')
def recognize_person(known_face_encodings, known_face_names): """ Function for recognize person's body on video @parameter known_face_encodings: List of encoding vectors of faces from current database @type known_face_encodings: C{list} @parameter known_face_names: path to the new photo @type known_face_names: C{list} """ # Initialize model for body detection detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Initialize connect with server credentials = pika.PlainCredentials(USER, PASSWORD) parameters = pika.ConnectionParameters(IP, PORT, credentials=credentials) connection = pika.BlockingConnection(parameters) channel = connection.channel() # Initialize parameters for logging last_visible = np.array([False for _ in range(0, len(known_face_names))], dtype=np.bool) last_visible_time = [ datetime.datetime.min for _ in range(0, len(known_face_names)) ] last_no_face = False last_no_face_time = datetime.datetime.min last_unknown = False last_unknown_time = datetime.datetime.min last_update_face_base = datetime.datetime(1, 1, 1, 0, 0, 0) update_time = time.time() + TIMEOUT_UPDATE process_this_frame = True # Get video stream and processed frame camera = cv2.VideoCapture(CAMERA_ID) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: while True: # Check for timeout for updating database if time.time() > update_time: update_time = time.time() + TIMEOUT_UPDATE if (datetime.datetime.now() - last_update_face_base).days >= TIME_TO_UPDATE: known_face_encodings, known_face_names = read_known_faces( ) last_update_face_base = datetime.datetime.now() # Get picture from stream ret, frame = camera.read() small_frame = cv2.resize(frame, (0, 0), fx=1 / DECREASING_LEVEL, fy=1 / DECREASING_LEVEL) rgb_small_frame = small_frame[:, :, ::-1] if process_this_frame: # Get detected objects (bodies and faces) image_np_expanded = np.expand_dims(frame, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) n_body = 0 for i in range(0, scores.shape[1]): if scores[0][i] > 0.5: n_body += 1 else: break # Get coordinates of box around faces face_locations = face_recognition.face_locations( rgb_small_frame) now_no_face = False # Check number of detected faces and bodies n_faces = len(face_locations) if n_body > n_faces: # Send alarm if anybody try to hide face now_no_face = True now = datetime.datetime.now() if not last_no_face: last_no_face_time = now else: if last_no_face_time != datetime.datetime.min: delta = now - last_no_face_time if delta.seconds > TIMEOUT: with open("logging.txt", "a+") as log_file: user_id = None send_data = { "userId": user_id, "cameraId": str(CAMERA_ID) } json_send_data = json.dumps(send_data) channel.basic_publish( exchange='', routing_key='users', body=json_send_data) log_file.write( "\nALARM NO FACE at " + now.strftime("%H:%M:%S %d-%m-%Y")) last_no_face_time = datetime.datetime.min # Get identified faces embeddings face_encodings = face_recognition.face_encodings( rgb_small_frame, face_locations) face_names = [] now_visible = np.array( [False for _ in range(0, len(known_face_names))], dtype=np.bool) now_unknown = False # Find similar face from database for face_encoding in face_encodings: name = "Unknown" matches = face_recognition.compare_faces( known_face_encodings, face_encoding) face_distances = face_recognition.face_distance( known_face_encodings, face_encoding) best_match_index = np.argmin(face_distances) if matches[best_match_index]: # Current face was recognized - send record about it name = known_face_names[best_match_index] now_visible[best_match_index] = True now = datetime.datetime.now() if not last_visible[best_match_index]: last_visible_time[best_match_index] = now else: if last_visible_time[ best_match_index] != datetime.datetime.min: delta = now - last_visible_time[ best_match_index] if delta.seconds > TIMEOUT: with open("logging.txt", "a+") as log_file: user_id = name.split('_')[0] send_data = { "userId": user_id, "cameraId": CAMERA_ID } json_send_data = json.dumps( send_data) channel.basic_publish( exchange='', routing_key='users', body=json_send_data) log_file.write( "\nRecognize " + name + " at " + now.strftime( "%H:%M:%S %d-%m-%Y")) last_visible_time[ best_match_index] = datetime.datetime.min else: # Current face was NOT recognized - send alarm about it now_unknown = True now = datetime.datetime.now() if not last_unknown: last_unknown_time = now else: if last_unknown_time != datetime.datetime.min: delta = now - last_unknown_time if delta.seconds > TIMEOUT: with open("logging.txt", "a+") as log_file: user_id = None send_data = { "userId": user_id, "cameraId": CAMERA_ID } json_send_data = json.dumps( send_data) channel.basic_publish( exchange='', routing_key='users', body=json_send_data) log_file.write( "\nALARM at " + now.strftime( "%H:%M:%S %d-%m-%Y")) last_unknown_time = datetime.datetime.min face_names.append(name) last_visible = copy.deepcopy(now_visible) last_no_face = now_no_face last_unknown = now_unknown process_this_frame = not process_this_frame # Visualize box around person vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, skip_labels=True, skip_scores=True) # Visualize box around face with name for (face_top, face_right, face_bottom, face_left), name in zip(face_locations, face_names): face_coordinates = { "top": face_top * DECREASING_LEVEL, "right": face_right * DECREASING_LEVEL, "bottom": face_bottom * DECREASING_LEVEL, "left": face_left * DECREASING_LEVEL } if name == "Unknown": color = RED_COLOR else: color = BLUE_COLOR # Get face's coordinates cv2.rectangle( frame, (face_coordinates["left"], face_coordinates["top"]), (face_coordinates["right"], face_coordinates["bottom"]), color, 2) # Visualize person's name if he was recognized text_coordinates = get_text_coordinates( name, face_coordinates) cv2.rectangle(frame, (text_coordinates["left"] - 5, face_coordinates["bottom"]), (text_coordinates["right"] + 5, text_coordinates["bottom"] + 8), color, cv2.FILLED) cv2.putText(frame, name, (text_coordinates["left"], text_coordinates["bottom"] + 4), TEXT_FONT, 1.0, WHITE_COLOR, 1) cv2.imshow('Video', frame) # Press 'q' to quit if cv2.waitKey(1) & 0xFF == ord('q'): break process_this_frame = not process_this_frame connection.close() camera.release() cv2.destroyAllWindows() return known_face_encodings, known_face_names
def inference(self): PATH_TO_CKPT = "D:/GitHub/traffic_sign_object_detection/fine_tuned_model/ssd_1st/frozen_inference_graph.pb" PATH_TO_LABELS = "D:/GitHub/traffic_sign_object_detection/data/annotations/label_map.pbtxt" label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') a_dict = {"bicycle": 1, "child":2, "const":3, "bump":2, "cross":4, "":0} result_list = [] for i in range(5): # prepare image self.ret, frame = self.video.read() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_expanded = np.expand_dims(frame_rgb, axis=0) # inference (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: frame_expanded}) disp_name = vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.6 ) # slicing the name disp_name = disp_name.split(":")[0] # swith to number result = a_dict[disp_name] # list append result_list.append(result) num_1 = result_list.count(1) num_2 = result_list.count(2) num_3 = result_list.count(3) num_4 = result_list.count(4) return_last = 0 if num_1 >= 3: return_last = 1 elif num_2 >= 3: return_last = 2 elif num_3 >= 3: return_last = 3 elif num_4 >= 3: return_last = 4 # return the result-integer return return_last
def __init__(self): # ROS initialize rospy.init_node('ros_tensorflow_ObjectDetection') rospy.on_shutdown(self.shutdown) # Set model path and image topic model_path = rospy.get_param("~model_path", "") image_topic = rospy.get_param("~image_topic", "") self._cv_bridge = CvBridge() rospy.loginfo("finding model path...") '''select model path ,model label and model name,include 'MODEL_NAME' 'PATH_TO_CKPT' and 'PATH_TO_LABELS' ''' MODEL_NAME = '/outputing' PATH_TO_CKPT = model_path + MODEL_NAME + '/frozen_inference_graph.pb' PATH_TO_LABELS = os.path.join(model_path + '/data', 'frame_label_map.pbtxt') # What model to download. # MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' # MODEL_FILE = MODEL_NAME + '.tar.gz' # DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' # Path to frozen detection graph. This is the actual model that is used for the object detection. # PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. # PATH_TO_LABELS = os.path.join(model_path+'/data', 'mscoco_label_map.pbtxt') # NUM_CLASSES = 1 NUM_CLASSES = 90 # Download Model # rospy.loginfo("Downloading models...") #send loginfo # opener = urllib.request.URLopener() # opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) # tar_file = tarfile.open(MODEL_FILE) # for file in tar_file.getmembers(): # file_name = os.path.basename(file.name) #use os.path.basename for # if 'frozen_inference_graph.pb' in file_name: # tar_file.extract(file, os.getcwd()) #os.getcwd() #Load a (frozen) Tensorflow model into memory. self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') rospy.loginfo("loading models' label ......") rospy.loginfo("please wait") # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) #Initialize ROS Subscriber and Publisher self._sub = rospy.Subscriber(image_topic, ROSImage, self.callback, queue_size=100) self._pub = rospy.Publisher('object_detection', ROSImage, queue_size=1) rospy.loginfo("Start object dectecter ...") config = tf.ConfigProto() config.gpu_options.allow_growth = True #
def object_detect(file1, file2): import numpy as np import tensorflow as tf import scipy.misc from PIL import Image if tf.__version__ != '1.4.0': raise ImportError( 'Please upgrade your tensorflow installation to v1.4.0!') from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util MODEL_NAME = 'training' PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' PATH_TO_LABELS = MODEL_NAME + '/object-detection.pbtxt' NUM_CLASSES = 1 detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') image = Image.open(file1) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=4) scipy.misc.imsave(file2, image_np)
def main(): print("starting program . . .") if not checkIfNecessaryPathsAndFilesExist(): return # end if # this next comment line is necessary to avoid a false PyCharm warning # noinspection PyUnresolvedReferences if StrictVersion(tf.__version__) < StrictVersion('1.5.0'): raise ImportError( 'Please upgrade your tensorflow installation to v1.5.* or later!') # end if # load a (frozen) TensorFlow model into memory detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FROZEN_INFERENCE_GRAPH_LOC, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # end with # end with # Loading label map # Label maps map indices to category names, so that when our convolution network predicts `5`, # we know that this corresponds to `airplane`. Here we use internal utility functions, # but anything that returns a dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(LABELS_LOC) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) imageFilePaths = [] for child_dir in [ f.path for f in os.scandir(TEST_IMAGE_DIR) if f.is_dir() ]: for imageFileName in os.listdir(child_dir): if imageFileName.endswith(".jpg"): imageFilePaths.append(child_dir + "/" + imageFileName) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: for image_path in imageFilePaths: image_np = cv2.imread(image_path) if image_np is None: print("error reading file " + image_path) continue # end if # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # print out, what was predicted objects = [] threshold = 0.2 # in order to get higher percentages you need to lower this number; usually at 0.01 you get 100% predicted objects for index, value in enumerate(classes[0]): object_dict = {} if scores[0, index] > threshold: object_dict[(category_index.get(value)).get( 'name').encode('utf8')] = scores[0, index] objects.append(object_dict) # objects: [{b'mouse': 0.971244}] # print(objects) # we assume there is only one object found: try: classification = list(objects[0].keys())[0] score = round(objects[0][classification] * 100, 2) classification = classification.decode("utf-8") except: classification = "-" score = "-" print("%s : %s : %r " % (image_path, classification, score)) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) resized_image = cv2.resize(image_np, (0, 0), fx=0.8, fy=0.8) cv2.imshow("image_np", resized_image) cv2.waitKey()
def main(_): assert FLAGS.train_dir, '`train_dir` is missing.' assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing' assert FLAGS.eval_dir, '`eval_dir` is missing.' configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir) tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.train_dir, 'pipeline.config'), overwrite=True) tf.gfile.MakeDirs(FLAGS.eval_dir) tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.eval_dir, 'pipeline.config'), overwrite=True) model_config = configs['model'] train_config = configs['train_config'] train_input_config = configs['train_input_config'] eval_config = configs['eval_config'] if FLAGS.eval_training_data: eval_input_config = configs['train_input_config'] else: eval_input_config = configs['eval_input_config'] # setting to run evaluation after EPOCHS_BETWEEN_EVALS epochs of training. # total number of training is set to total_num_epochs provided in the config if train_config.num_steps: total_num_epochs = train_config.num_steps train_config.num_steps = FLAGS.epochs_between_evals total_training_cycle = total_num_epochs // train_config.num_steps else: # TODO(mehdi): make it run indef total_num_epochs = 20000000 train_config.num_steps = FLAGS.epochs_between_evals total_training_cycle = total_num_epochs // train_config.num_steps train_model_fn = functools.partial(model_builder.build, model_config=model_config, is_training=True) eval_model_fn = functools.partial(model_builder.build, model_config=model_config, is_training=False) def get_next(config): return dataset_util.make_initializable_iterator( dataset_builder.build(config)).get_next() # functions to create a tensor input dictionary for both training & evaluation train_input_dict_fn = functools.partial(get_next, train_input_config) eval_input_dict_fn = functools.partial(get_next, eval_input_config) # If not explicitly specified in the constructor and the TF_CONFIG # environment variable is present, load cluster_spec from TF_CONFIG. env = json.loads(os.environ.get('TF_CONFIG', '{}')) cluster_data = env.get('cluster', None) cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None task_data = env.get('task', {'type': 'master', 'index': 0}) task_info = type('TaskSpec', (object,), task_data) # Parameters for a single worker. parameter_server_tasks = 0 worker_replicas = 1 worker_job_name = 'lonely_worker' task = 0 is_chief = True master = '' if cluster_data and 'worker' in cluster_data: # Number of total worker replicas include "worker"s and the "master". worker_replicas = len(cluster_data['worker']) + 1 if cluster_data and 'ps' in cluster_data: parameter_server_tasks = len(cluster_data['ps']) if worker_replicas > 1 and parameter_server_tasks < 1: raise ValueError('At least 1 ps task is needed for distributed training.') if worker_replicas >= 1 and parameter_server_tasks > 0: # Set up distributed training. server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc', job_name=task_info.type, task_index=task_info.index) if task_info.type == 'ps': server.join() return worker_job_name = '%s/task:%d' % (task_info.type, task_info.index) task = task_info.index is_chief = (task_info.type == 'master') master = server.target label_map = label_map_util.load_labelmap(eval_input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes) if FLAGS.run_once: eval_config.max_evals = 1 train_graph_rewriter_fn = eval_graph_rewriter_fn = None if 'graph_rewriter_config' in configs: train_graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=True) eval_graph_rewriter_fn = graph_rewriter_builder.build( configs['eval_rewriter_config'], is_training=False) def train(): return trainer.train(create_tensor_dict_fn=train_input_dict_fn, create_model_fn=train_model_fn, train_config=train_config, master=master, task=task, num_clones=FLAGS.num_clones, worker_replicas=worker_replicas, clone_on_cpu=FLAGS.clone_on_cpu, ps_tasks=parameter_server_tasks, worker_job_name=worker_job_name, is_chief=is_chief, train_dir=FLAGS.train_dir, graph_hook_fn=train_graph_rewriter_fn) def evaluate(): return evaluator.evaluate(eval_input_dict_fn, eval_model_fn, eval_config, categories, FLAGS.train_dir, FLAGS.eval_dir, graph_hook_fn=eval_graph_rewriter_fn) for cycle_index in range(total_training_cycle): tf.logging.info('Starting a training cycle: %d/%d', cycle_index, total_training_cycle) train() tf.logging.info('Starting to evaluate.') eval_metrics = evaluate() if stopping_criteria_met(eval_metrics, FLAGS.mask_min_ap, FLAGS.box_min_ap): tf.logging.info('Stopping criteria met. Training stopped') break
def load_label_map(self, label_map_path): label_map = label_map_util.load_labelmap(label_map_path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) return categories, category_index