def main(unused_argv): assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.' tf.gfile.MakeDirs(FLAGS.eval_dir) if FLAGS.pipeline_config_path: configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.eval_dir, 'pipeline.config'), overwrite=True) else: configs = config_util.get_configs_from_multiple_files( model_config_path=FLAGS.model_config_path, eval_config_path=FLAGS.eval_config_path, eval_input_config_path=FLAGS.input_config_path) for name, config in [('model.config', FLAGS.model_config_path), ('eval.config', FLAGS.eval_config_path), ('input.config', FLAGS.input_config_path)]: tf.gfile.Copy(config, os.path.join(FLAGS.eval_dir, name), overwrite=True) model_config = configs['model'] eval_config = configs['eval_config'] input_config = configs['eval_input_config'] if FLAGS.eval_training_data: input_config = configs['train_input_config'] model_fn = functools.partial( model_builder.build, model_config=model_config, is_training=False) def get_next(config): return dataset_util.make_initializable_iterator( dataset_builder.build(config)).get_next() create_input_dict_fn = functools.partial(get_next, input_config) label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) if FLAGS.run_once: eval_config.max_evals = 1 graph_rewriter_fn = None if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=False) evaluator.evaluate( create_input_dict_fn, model_fn, eval_config, categories, FLAGS.checkpoint_dir, FLAGS.eval_dir, graph_hook_fn=graph_rewriter_fn)
def __init__(self): logger.info('Loading Tensorflow Detection API') weights_path = get_file(config.SSD_INCEPTION_FILENAME, config.SSD_INCEPTION_URL, cache_dir=os.path.abspath(config.WEIGHT_PATH), cache_subdir='models') extract_path = weights_path.replace('.tar.gz', '') if not os.path.exists(extract_path): tar = tarfile.open(weights_path, "r:gz") tar.extractall(path=os.path.join(config.WEIGHT_PATH, 'models')) tar.close() pb_path = os.path.join(extract_path, self.PB_NAME) self.graph = tf.Graph() with self.graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(pb_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) self.categories = label_map_util.convert_label_map_to_categories(self.label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(self.categories)
def read_data_and_evaluate(input_config, eval_config): """Reads pre-computed object detections and groundtruth from tf_record. Args: input_config: input config proto of type object_detection.protos.InputReader. eval_config: evaluation config proto of type object_detection.protos.EvalConfig. Returns: Evaluated detections metrics. Raises: ValueError: if input_reader type is not supported or metric type is unknown. """ if input_config.WhichOneof('input_reader') == 'tf_record_input_reader': input_paths = input_config.tf_record_input_reader.input_path label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) object_detection_evaluators = evaluator.get_evaluators( eval_config, categories) # Support a single evaluator object_detection_evaluator = object_detection_evaluators[0] skipped_images = 0 processed_images = 0 for input_path in _generate_filenames(input_paths): tf.logging.info('Processing file: {0}'.format(input_path)) record_iterator = tf.python_io.tf_record_iterator(path=input_path) data_parser = tf_example_parser.TfExampleDetectionAndGTParser() for string_record in record_iterator: tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000, processed_images) processed_images += 1 example = tf.train.Example() example.ParseFromString(string_record) decoded_dict = data_parser.parse(example) if decoded_dict: object_detection_evaluator.add_single_ground_truth_image_info( decoded_dict[standard_fields.DetectionResultFields.key], decoded_dict) object_detection_evaluator.add_single_detected_image_info( decoded_dict[standard_fields.DetectionResultFields.key], decoded_dict) else: skipped_images += 1 tf.logging.info('Skipped images: {0}'.format(skipped_images)) return object_detection_evaluator.evaluate() raise ValueError('Unsupported input_reader_config.')
def main(unused_argv): assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.' if FLAGS.pipeline_config_path: configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) else: configs = config_util.get_configs_from_multiple_files( model_config_path=FLAGS.model_config_path, eval_config_path=FLAGS.eval_config_path, eval_input_config_path=FLAGS.input_config_path) pipeline_proto = config_util.create_pipeline_proto_from_configs(configs) config_text = text_format.MessageToString(pipeline_proto) tf.gfile.MakeDirs(FLAGS.eval_dir) with tf.gfile.Open(os.path.join(FLAGS.eval_dir, 'pipeline.config'), 'wb') as f: f.write(config_text) model_config = configs['model'] lstm_config = configs['lstm_model'] eval_config = configs['eval_config'] input_config = configs['eval_input_config'] if FLAGS.eval_training_data: input_config.external_input_reader.CopyFrom( configs['train_input_config'].external_input_reader) lstm_config.eval_unroll_length = lstm_config.train_unroll_length model_fn = functools.partial( model_builder.build, model_config=model_config, lstm_config=lstm_config, is_training=False) def get_next(config, model_config, lstm_config, unroll_length): return seq_dataset_builder.build(config, model_config, lstm_config, unroll_length) create_input_dict_fn = functools.partial(get_next, input_config, model_config, lstm_config, lstm_config.eval_unroll_length) label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) if FLAGS.run_once: eval_config.max_evals = 1 evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, FLAGS.checkpoint_dir, FLAGS.eval_dir)
def test_load_bad_label_map(self): label_map_string = """ item { id:0 name:'class that should not be indexed at zero' } item { id:2 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) with self.assertRaises(ValueError): label_map_util.load_labelmap(label_map_path)
def __init__(self): self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) with self.detection_graph.as_default(): # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') self.tensor_dict = tensor_dict self.image_tensor = image_tensor self.label_map = label_map self.category_index = category_index self.session = tf.Session(graph=self.detection_graph)
def main(unused_argv): assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.' if FLAGS.pipeline_config_path: model_config, eval_config, input_config = get_configs_from_pipeline_file() else: model_config, eval_config, input_config = get_configs_from_multiple_files() model_fn = functools.partial( model_builder.build, model_config=model_config, is_training=False) create_input_dict_fn = functools.partial( input_reader_builder.build, input_config) label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, FLAGS.checkpoint_dir, FLAGS.eval_dir)
# python infer1.py --model_file /media/htic/NewVolume1/murali/Object_detection/models/research/models/model_glaucoma/graph/frozen_inference_graph.pb --input_path /media/htic/Balamurali/Sharath/Gl_challenge/REFUGE-Validation400/360_572 --inp_img_ext jpg --output_path /media/htic/Balamurali/Sharath/Gl_challenge/REFUGE-Validation400/360_572_out/ --label_file /media/htic/NewVolume1/murali/Object_detection/models/research/data/glaucoma_label_map.pbtxt # Argument parsed and assigned opt = parser.parse_args() model_file = opt.model_file inp_img_ext = opt.inp_img_ext label_file = opt.label_file detection_out_path = opt.output_path input_path = opt.input_path NUM_CLASSES = 1 print (vars(opt)) val_img_path = os.path.join(input_path ,'*.' + inp_img_ext) img_paths = glob.glob(val_img_path) label_map = label_map_util.load_labelmap(label_file) categories = label_map_util.convert_label_map_to_categories(label_map,max_num_classes=NUM_CLASSES,use_display_name=True) category_index = label_map_util.create_category_index(categories) # Initializing the graph detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_file,'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def,name='') with detection_graph.as_default(): with tf.Session() as sess: ops = tf.get_default_graph().get_operations()
def create_category_index(labels_path, max_classes): """Create a index from category id to name""" label_map = label_map_util.load_labelmap(labels_path) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=max_classes, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index
PATH_TO_FROZEN_GRAPH = "E:/SOFTWARES/Object Detection/ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb" detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Loading label map # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds # to `airplane`. Here we use internal utility functions,but anything that returns a dictionary mapping integers # to appropriate string labels would be fine NUM_CLASSES = 90 label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # To Start Live Cam Object Detection with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: while True: ret, image_np = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects.
def facedet_objdet_as_service(): Logger.info("Running face and object detection as a service...") # networking global sock sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: sock.bind((HOST, PORT)) except socket.error as msg: Logger.error("Socket bind failed. Error code : " + str(msg[0]) + ", message " + msg[1]) return sock.listen(10) # load models config = tf.ConfigProto() config.gpu_options.allow_growth = True g_facedet = tf.Graph() g_objdet = tf.Graph() g_facenet = tf.Graph() with g_facedet.as_default(): Logger.debug("Loading face detection model: " + PATH_TO_FACEDET_MODEL) od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FACEDET_MODEL, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') image_tensor_face = g_facedet.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes_tensor_face = g_facedet.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores_tensor_face = g_facedet.get_tensor_by_name('detection_scores:0') classes_tensor_face = g_facedet.get_tensor_by_name( 'detection_classes:0') num_detections_tensor_face = g_facedet.get_tensor_by_name( 'num_detections:0') with g_objdet.as_default(): Logger.debug("Loading object detection model: " + PATH_TO_OBJDET_MODEL) od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_OBJDET_MODEL, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Definite input and output Tensors for detection_graph image_tensor_obj = g_objdet.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes_obj = g_objdet.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores_obj = g_objdet.get_tensor_by_name( 'detection_scores:0') detection_classes_obj = g_objdet.get_tensor_by_name( 'detection_classes:0') num_detections_obj = g_objdet.get_tensor_by_name('num_detections:0') with g_facenet.as_default(): Logger.debug("Loading feature extraction model: " + PATH_TO_FACENET_MODEL) facenet.load_model(PATH_TO_FACENET_MODEL) faces_placeholder = g_facenet.get_tensor_by_name('input:0') embeddings = g_facenet.get_tensor_by_name('embeddings:0') phase_train_placeholder = g_facenet.get_tensor_by_name('phase_train:0') embedding_size = embeddings.get_shape()[1] Logger.debug("Loading face classifier: " + PATH_TO_FACENET_CLASSIFIER) with open(PATH_TO_FACENET_CLASSIFIER, 'rb') as infile: (facenet_model, facenet_class_names) = pickle.load(infile) label_map = label_map_util.load_labelmap(PATH_TO_OBJDET_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES_OBJDET, use_display_name=True) category_index = label_map_util.create_category_index(categories) ''' Server ''' while server_flag: Logger.info('Waiting for connections...') try: conn, addr = sock.accept() except socket.error as msg: break start_time = time.time() img_data = [] try: operation = conn.recv(32).decode() if operation == "train": conn.send("OK".encode()) try: tokens = conn.recv(32).decode().split(' ') no_objects = int(tokens[0]) no_images = int(tokens[1]) except Exception as e: conn.send("Error with integer values: " + str(e)) continue conn.send("OK".encode()) Logger.info( "Request to perform training on {} aliases with {} images each." .format(no_objects, no_images)) for i in range(no_objects): alias = conn.recv(32).decode().replace(" ", "_") Logger.debug("Alias: " + alias) folder = PATH_TO_LFW_IMAGES + "/" + alias if os.path.exists(folder): os.rmdir(folder) os.makedirs(folder) conn.send("OK".encode()) img_data, _ = Connection.download_images(conn, in_mem=True, close_conn=False) n = 0 for img_buf in img_data: img = cv2.imdecode( np.asarray(bytearray(img_buf), dtype=np.uint8), 0) cv2.imwrite(folder + "/" + alias + str(n) + ".png", img) dataset = facenet.get_dataset(PATH_TO_LFW_IMAGES) paths, labels = facenet.get_image_paths_and_labels(dataset) Logger.debug('Number of classes: %d' % len(dataset)) Logger.debug('Number of images: %d' % len(paths)) nrof_images = len(paths) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / TRAIN_BATCH_SIZE)) emb_array = np.zeros((nrof_images, embedding_size)) with tf.Session(config=config, graph=g_facenet) as sess: for i in range(nrof_batches_per_epoch): start_index = i * TRAIN_BATCH_SIZE end_index = min((i + 1) * TRAIN_BATCH_SIZE, nrof_images) paths_batch = paths[start_index:end_index] images = facenet.load_data(paths_batch, False, False, TRAIN_IMG_SIZE) feed_dict = { faces_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) model = SVC(kernel='linear', probability=True) model.fit(emb_array, labels) class_names = [cls.name.replace('_', ' ') for cls in dataset] with open(PATH_TO_FACENET_CLASSIFIER, 'wb') as outfile: pickle.dump((model, class_names), outfile) Logger.debug('Saved classifier model to file "%s"' % PATH_TO_FACENET_CLASSIFIER) elif operation == "objdet" or operation == "facerec": conn.send("OK".encode()) img_data, _, img_ids = Connection.download_images( conn, in_mem=True, close_conn=False) pprint(img_ids) else: conn.send("No such operation".encode()) except Exception as e: Logger.error(str(e)) Logger.info('Images download took {} s'.format(time.time() - start_time)) Logger.info('Received {} images'.format(len(img_data))) send_buf = "" start_time = time.time() images = [] for i in range(len(img_data)): img_buf = img_data[i] if img_buf is None or len(img_buf) == 0: continue img = cv2.imdecode(np.asarray(bytearray(img_buf), dtype=np.uint8), 0) if img.ndim < 2: Logger.error('Unable to align image') continue elif img.ndim == 2: img = facenet.to_rgb(img) elif len(img.shape) > 2 and img.shape[2] == 4: img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) [h, w] = np.asarray(img.shape)[0:2] if operation == "objdet": image_np_expanded = np.expand_dims(img, axis=0) images.append(image_np_expanded) elif operation == "facerec": scaled = misc.imresize(img, (160, 160), interp='bilinear') prew = facenet.prewhiten(scaled) images.append(prew) if operation == "objdet": for i in range(len(images)): image_np_expanded = images[i] img_id = img_ids[i] # Face detection - running in both cases with tf.Session(config=config, graph=g_facedet) as sess: np.random.seed(777) (bounding_boxes, scores, classes, num_detections) = sess.run( [ boxes_tensor_face, scores_tensor_face, classes_tensor_face, num_detections_tensor_face ], feed_dict={image_tensor_face: image_np_expanded}) bounding_boxes = np.squeeze(bounding_boxes) scores = np.squeeze(scores) for i in range(bounding_boxes.shape[0]): if scores[i] < FACEDET_THRESH: continue send_buf = send_buf + "face;{};{};{};{};{}:".format( img_id, bounding_boxes[i, 1], bounding_boxes[i, 0], bounding_boxes[i, 3], bounding_boxes[i, 2]) # Object detection with tf.Session(config=config, graph=g_objdet) as sess: np.random.seed(777) (bounding_boxes, scores, classes, num_detections) = sess.run( [ detection_boxes_obj, detection_scores_obj, detection_classes_obj, num_detections_obj ], feed_dict={image_tensor_obj: image_np_expanded}) bounding_boxes = np.squeeze(bounding_boxes) scores = np.squeeze(scores) classes = np.squeeze(classes) for i in range(bounding_boxes.shape[0]): if scores[i] < FACEDET_THRESH: continue send_buf = send_buf + "{};{};{};{};{};{}:".format( category_index[int(classes[i])]['name'], img_id, bounding_boxes[i, 1], bounding_boxes[i, 0], bounding_boxes[i, 3], bounding_boxes[i, 2]) elif operation == "facerec": with tf.Session(config=config, graph=g_facenet) as sess: feed_dict = { faces_placeholder: images, phase_train_placeholder: False } emb_array = np.zeros((len(img_data), embedding_size)) emb_array[:] = sess.run(embeddings, feed_dict=feed_dict) predictions = facenet_model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] for i in range(len(best_class_indices)): send_buf = send_buf + "%d;%s;%.3f:" % ( i, facenet_class_names[best_class_indices[i]], best_class_probabilities[i]) send_buf = send_buf + "END" Logger.info('Operation {} took {} s'.format(operation, time.time() - start_time)) Logger.debug('Sending: ' + send_buf) conn.send(str.encode(send_buf)) sock.close() Logger.info("Service stopped.")
size = img.size ratio = float(640) / max(size) new_image_size = tuple([int(x*ratio) for x in size]) img = img.resize(new_image_size, Image.ANTIALIAS) new_im = Image.new("RGB", (640, 640)) new_im.paste(img, ((640-new_image_size[0])//2, (640-new_image_size[1])//2)) return np.asarray(new_im) # gpu veya cpu belirleyin. once sess.list_devices() ile uygun olanlara bakabilirsiniz. os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' title = "MURAT_EREN" label_map = label_map_util.load_labelmap('v3/label_map.pbtxt') categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=14, use_display_name=True) category_index = label_map_util.create_category_index(categories) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile('v3/201725.pb', 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') cap = cv2.VideoCapture(str(sys.argv[1])) xwidth = cap.get(cv2.CAP_PROP_FRAME_WIDTH) xheight = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
def _load_label_map(self): label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index
def function1(): import pyttsx3 import pytesseract import numpy as np import os import six.moves.urllib as urllib import sys import tarfile import tensorflow as tf import zipfile from collections import defaultdict from io import StringIO from matplotlib import pyplot as plt from PIL import Image sys.path.append("..") pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract' from object_detection.utils import ops as utils_ops if tf.__version__ < '1.4.0': raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!') #get_ipython().run_line_magic('matplotlib', 'inline') from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util import cv2 # Capture video from file """cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if ret == True: #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cv2.imshow('frame',frame) if cv2.waitKey(30) & 0xFF == ord('q'): cv2.imwrite('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/object_detection/test_images/image21.jpg',frame) break else: break cap.release() cv2.destroyAllWindows() """ MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' MODEL_FILE = MODEL_NAME + '.tar.gz' DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' PATH_TO_LABELS = os.path.join('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/object_detection/data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 tar_file = tarfile.open(MODEL_FILE) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, os.getcwd()) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) PATH_TO_TEST_IMAGES_DIR = 'C:/Users/hi/AppData/Local/Programs/Python/Python36/models/object_detection/test_images' TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(90, 91) ] # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_np_expanded}) li = [category_index.get(value).get('name') for index,value in enumerate(classes[0]) if scores[0,index] > 0.5] li1=[] for i in li: if i not in li1: li1.append(i) str1 = " ".join(li1) print("Object Detected - ",str1) #print(str1) engine = pyttsx3.init() engine.say(str1) engine.runAndWait() #myobj = gTTS(text=str1, lang=language, slow=False) #myobj.save("welcome.mp3") #os.system("welcome.mp3") output_dict = run_inference_for_single_image(image_np, detection_graph) vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) #plt.figure(figsize=IMAGE_SIZE) cv2.imshow('image',image_np) cv2.imwrite('image.jpg',image_np) #plt.show(image_np) DEBUG=0 global img_x,img_y image = cv2.imread('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/object_detection/test_images/image90.jpg') #im=Image.open('start.jpg') img_y = len(image) img_x = len(image[0]) #print(img_x,img_y) #w2,h2=im.size #print(w2,h2) gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) #cv2.imshow('gray', gray) #cv2.waitKey(0) def ii(xx, yy): #global image, img_y, img_x if yy >= img_y or xx >= img_x: #print "pixel out of bounds ("+str(y)+","+str(x)+")" return 0 pixel = image[yy][xx] return 0.30 * pixel[2] + 0.59 * pixel[1] + 0.11 * pixel[0] def connected(contour): first = contour[0][0] last = contour[len(contour) - 1][0] return abs(first[0] - last[0]) <= 1 and abs(first[1] - last[1]) <= 1 """def keep(contour): return keep_box(contour) #and connected(contour) """ # Whether we should keep the containing box of this # contour based on it's shape """def keep_box(contour): xx, yy, w_, h_ = cv2.boundingRect(contour) # width and height need to be floats w_ *= 1.0 h_ *= 1.0 # Test it's shape - if it's too oblong or tall it's # probably not a real character if w_ / h_ < 0.1 or w_ / h_ > 10: if DEBUG: print "\t Rejected because of shape: (" + str(xx) + "," + str(yy) + "," + str(w_) + "," + str(h_) + ")" + \ str(w_ / h_) return False # check size of the box if ((w_ * h_) > ((img_x * img_y) / 5)) or ((w_ * h_) < 15): if DEBUG: print "\t Rejected because of size" return False return True""" #binary ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV) #cv2.imshow('second', thresh) #cv2.waitKey(0) #dilation kernel = np.ones((1,1), np.uint8) img_dilation = cv2.dilate(thresh, kernel, iterations=1) #ret1,thresh1 = cv2.threshold(img_dilation,10,255,cv2.THRESH_OTSU) #cv2.imshow('dilated', img_dilation) img1=cv2.bitwise_not(img_dilation) #cv2.imshow('inv', img1) #cv2.waitKey(0) #find contours im2,ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) #print ctrs,hier #sort contours sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0]) keepers=[] #print(sorted_ctrs) for i, ctr in enumerate(sorted_ctrs): """if(i==len(sorted_ctrs)-5): break;""" #print(ctr) # Get bounding box #if (w > 15 and h > 15): x, y, w, h = cv2.boundingRect(ctr) if((w<img_x-8) and (h<img_y-8))and (w > 5 and h > 5): keepers.append([ctr, [x, y, w, h]]) #print(x,y,w,h) roi = image[y:y+h, x:x+w] cv2.rectangle(image,(x,y),( x + (w), y + (h) ),(0,255,0),2) if(w > 5 and h > 5): cv2.imwrite('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/pics/{}.png'.format(i), roi) #ret2,thresh2 = cv2.threshold(image,10,255,cv2.THRESH_OTSU) new_image = im2.copy() new_image.fill(255) #cv2.imwrite("C:/Users/hi/AppData/Local/Programs/Python/Python36/models/filledge.jpg",new_image) boxes = [] # For each box, find the foreground and background intensities for index_, (contour_, box) in enumerate(keepers): # Find the average intensity of the edge pixels to # determine the foreground intensity fg_int = 0.0 for p in contour_: fg_int += ii(p[0][0], p[0][1]) fg_int /= len(contour_) if DEBUG: print("FG Intensity for #%d = %d" % (index_, fg_int)) # Find the intensity of three pixels going around the # outside of each corner of the bounding box to determine # the background intensity x_, y_, width, height = box bg_int = \ [ # bottom left corner 3 pixels ii(x_ - 1, y_ - 1), ii(x_ - 1, y_), ii(x_, y_ - 1), # bottom right corner 3 pixels ii(x_ + width + 1, y_ - 1), ii(x_ + width, y_ - 1), ii(x_ + width + 1, y_), # top left corner 3 pixels ii(x_ - 1, y_ + height + 1), ii(x_ - 1, y_ + height), ii(x_, y_ + height + 1), # top right corner 3 pixels ii(x_ + width + 1, y_ + height + 1), ii(x_ + width, y_ + height + 1), ii(x_ + width + 1, y_ + height) ] # Find the median of the background # pixels determined above bg_int = np.median(bg_int) if DEBUG: print("BG Intensity for #%d = %s" % (index_, repr(bg_int))) # Determine if the box should be inverted if fg_int >= bg_int: fg = 255 bg = 0 else: fg = 0 bg = 255 # Loop through every pixel in the box and color the # pixel accordingly for x in range(x_, x_ + width): for y in range(y_, y_ + height): if y >= img_y or x >= img_x: if DEBUG: print("pixel out of bounds (%d,%d)" % (y, x)) continue if ii(x, y) > fg_int: new_image[y][x] = bg else: new_image[y][x] = fg # blur a bit to improve ocr accuracy #new_image = cv2.blur(new_image, (1, 1)) ###kernel = np.ones((1,1), np.uint8) #new_image = cv2.dilate(new_image, kernel, iterations=1) ###new_image = cv2.blur(new_image, (1, 1)) cv2.imwrite('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/text.jpg', new_image) #print(pytesseract.image_to_string(Image.open('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/text.jpg'))) engine = pyttsx3.init() img11=Image.open('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/text.jpg') print("Text Detected - ",pytesseract.image_to_string(img11)) engine.say(pytesseract.image_to_string(img11)) engine.runAndWait() cv2.imshow('marked areas',image) #cv2.imshow('marked',thresh2) img1=cv2.bitwise_not(image) #cv2.imshow('inv_img', img1) #cv2.imwrite('marked.png',image) #cv2.imshow('roi1.png',roi1) cv2.waitKey(0)
def main(args): ap = argparse.ArgumentParser() ap.add_argument("-f", "--frozen_inference_graph", help='Path to frozen_inference_graph .pb file') ap.add_argument( "-l", "--label_map", help= "A .pbtxt file that contains all unique classes and their int map by given format" ) ap.add_argument("-n", "--num_classes", help="Path to number of classes txt file") ap.add_argument("-i", "--images", default=os.path.join(DATASET_DIR, 'test', 'images'), help="Path to images") args = vars(ap.parse_args()) PATH_TO_TEST_IMAGES_DIR = args['images'] PATH_TO_CKPT = args['frozen_inference_graph'] num_classes = args['num_classes'] PATH_TO_LABELS = args['label_map'] if PATH_TO_CKPT is None: PATH_TO_CKPT = get_last_frozen_inference_graph() print('frozen_inference_graph:', PATH_TO_CKPT) num_classes = os.path.abspath( os.path.join(PATH_TO_CKPT, '../', '../', 'ssd_num_classes.txt')) NUM_CLASSES = get_num_class(num_classes) PATH_TO_LABELS = os.path.abspath( os.path.join(PATH_TO_CKPT, '../', '../', 'ssd_label_map.pbtxt')) print('frozen_inference_graph:', PATH_TO_CKPT) print('NUM_CLASSES:', NUM_CLASSES) print('label_map:', PATH_TO_LABELS) print('PATH_TO_TEST_IMAGES_DIR:', PATH_TO_TEST_IMAGES_DIR) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) TEST_IMAGE_PATHS = sorted([ f for f in glob.glob(PATH_TO_TEST_IMAGES_DIR + "**/*.jpg", recursive=True) ]) # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) counter = 0 for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) # plt.imshow(image_np) outputs = os.path.join(ROOT_DIR, 'outputs') plt.imsave(fname=(outputs + '/' + str(counter) + '.jpg'), arr=image_np) counter += 1
def get_category_index(path, number_of_classes): label_map = label_map_util.load_labelmap(path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=number_of_classes, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index
def main(): CWD_PATH = os.getcwd() # Path to frozen detection graph. This is the actual model that is used for the object detection. MODEL_NAME = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03' PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb') PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # First test on images PATH_TO_TEST_IMAGES_DIR = '/Users/chinmayiprasad/Documents/DeepLearning/Project/object_detection/image_no_bags' TEST_IMAGE_PATHS = glob.glob( os.path.join(PATH_TO_TEST_IMAGES_DIR + "/*.jpg")) IMAGE_SIZE = (12, 8) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') countBagImages = 0 count = 0 with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: for image_path in TEST_IMAGE_PATHS: try: image = Image.open(image_path) image_np = load_image_into_numpy_array(image) image_process, classes, scores, boxes = detect_objects( image_np, sess, detection_graph, category_index) if 1 in set(classes[:5]) and 31 in set(classes): countBagImages += 1 classesBag = classes[np.where(scores >= 0.30)] classesPerson = classes[np.where(scores >= 0.75)] if 1 in set(classesPerson) and (31 in set(classesBag) or 27 in set(classesBag)): shutil.copy( image_path, '/Users/chinmayiprasad/Documents/DeepLearning/Project/object_detection/proc_humanNoBag' ) count += 1 print(count) if count % 100 == 0: print(count) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_process) except Exception as e: print("Skipping {} and {}".format(image_path, e)) time.sleep(1.5)
def main(input_path, output_path, config_path, ckpt_path): # we recover our saved model here cwd = os.path.abspath(os.getcwd()) # gets the last ckpt from the ckpt folder automatically, # gets full paths for ckpt and pipeline files ckpt_name = sorted(os.listdir(ckpt_path))[1].split(".")[0] model_dir = ckpt_path + ckpt_name config_path = cwd + "/" + config_path model_dir = cwd + "/" + model_dir print("[INFO]: Last checkpoint is:", model_dir) print() print("[INFO]: Config path is:", config_path) print() configs = config_util.get_configs_from_pipeline_file(config_path) print(configs) print() model_config = configs["model"] detection_model = model_builder.build(model_config=model_config, is_training=False) # Restore checkpoint ckpt = tf.compat.v2.train.Checkpoint(model=detection_model) ckpt.restore(model_dir) print("[INFO]: Done restoring model...") detect_fn = get_model_detection_function(detection_model) #map labels for inference decoding label_map_path = configs['eval_input_config'].label_map_path label_map = label_map_util.load_labelmap(label_map_path) print("[INFO]: Done") categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=label_map_util.get_max_label_map_index(label_map), use_display_name=True) category_index = label_map_util.create_category_index(categories) label_map_dict = label_map_util.get_label_map_dict(label_map, use_display_name=True) #run detector on test image #it takes a little longer on the first run and then runs at normal speed. print("[INFO]: Loaded labels...") print() #input video for object detection inference if not isinstance(input_path, int): vid = WebcamVideoStream(src=0).start() # run another while function else: vid = FileVideoStream( input_path).start() # run another while in a function time.sleep(1.0) #output video name if output_path != None: fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') videoOut = cv2.VideoWriter(output_path, fourcc, 30.0, (im.shape[1], im.shape[0])) print("[INFO] loading model...") print("[INFO] starting video play...") fps = FPS().start() while True: frame = vid.read() frame = imutils.resize(frame, width=450) (im_width, im_height) = (frame.shape[1], frame.shape[0]) image_np = np.array(frame).reshape( (im_height, im_width, 3)).astype(np.uint8) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections, predictions_dict, shapes = detect_fn(input_tensor) label_id_offset = 1 image_np_with_detections = image_np.copy() viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detections['detection_boxes'][0].numpy(), (detections['detection_classes'][0].numpy() + label_id_offset).astype(int), detections['detection_scores'][0].numpy(), category_index, use_normalized_coordinates=True, max_boxes_to_draw=100, min_score_thresh=.5, agnostic_mode=False, ) cv2.imshow("frame", image_np_with_detections) if cv2.waitKey(1) & 0xFF == ord('q'): break if output_path != None: videoOut.write(image_np_with_detections) fps.update() fps.stop() print("[INFO] elapsed time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) cv2.destroyAllWindows() vid.stop() if output_path != None: videoOut.release()
class PersonLoB: # ## Env setup # In[ ]: # What model to download. MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' MODEL_FILE = MODEL_NAME + '.tar.gz' DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join(os.environ["TENSORFLOW_MODELS"], 'research', 'object_detection', 'data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # ## Download Model # In[ ]: if not os.path.isdir(MODEL_NAME): logger.info("The model directory %s does not yet exist", MODEL_NAME) if not os.path.isfile(MODEL_FILE): logger.info("The model file %s does not yet exist", MODEL_FILE) opener = urllib.request.URLopener() opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) logger.info("The model file %s has been downloaded", MODEL_FILE) tar_file = tarfile.open(MODEL_FILE) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, os.getcwd()) logger.info("The model %s has been extracted", file) # ## Load a (frozen) Tensorflow model into memory. # In[ ]: logger.info("tf.Graph()") detection_graph = tf.Graph() with detection_graph.as_default(): logger.info("tf.GraphDef()") od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: logger.info("serialized_graph") serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # ## Loading label map # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine # In[ ]: logger.info("Loading label maps") label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Allocate GPU memory logger.info("Initializing TensorFlow session") config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(graph=detection_graph, config=config) logger.info("PersonLoB class ready") # In[ ]: def lob(self, ins): # inputData is image, fov, compass hdg, id class, threshhold resp = { "aob": None, "time": None, "object_found": None, "object_score": 0, "compass": None } start = datetime.now() # Identify the client peer = ins['peer'] timestamp = ins['timestamp'] idclass = float(ins['idclass'] or 1) threshhold = float(ins['threshhold'] or 0.20) fov = 120.0 if 'fov' in ins: if ins['fov']: fov = float(ins['fov']) # logger.info("[%s/%ld] fov = %f", peer, timestamp, fov) ch = 0.0 if 'compass' in ins: if ins['compass']: ch = float(ins['compass']) resp["compass"] = ch image_string = cStringIO.StringIO( ins['image'].split(",")[1].decode('base64')) image = Image.open(image_string) # This is needed to display the images. # get_ipython().magic(u'matplotlib inline') # This is needed since the notebook is stored in the object_detection folder. # sys.path.append("..") # ## Object detection imports # Here are the imports from the object detection module. # In[ ]: # # Model preparation # ## Variables # # Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. # # By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies. # ## Helper code # In[ ]: def load_image_into_numpy_array(image): #logger.info("[%s/%ld] Loading image into numpy array", peer, timestamp) (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) # # Detection # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) # In[ ]: # Apply algorithm to images with self.__class__.detection_graph.as_default(): #logger.info("[%s/%ld] Applying algorithm to images", peer, timestamp) with self.__class__.session.as_default(): #logger.info("[%s/%ld] Opened TensorFlow detection_graph session", peer, timestamp) # Definite input and output Tensors for detection_graph image_tensor = self.__class__.detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = self.__class__.detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = self.__class__.detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = self.__class__.detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = self.__class__.detection_graph.get_tensor_by_name( 'num_detections:0') # Open Image and get height and width for angle of object # image = Image.open(image) width, height = image.size #logger.info("[%s/%ld] Loading image of size %d by %d", peer, timestamp, width, height) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) #logger.info("[%s/%ld] Numpy array loaded", peer, timestamp) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. #logger.info("[%s/%ld] Running actual detection", peer, timestamp) (boxes, scores, classes, num) = self.__class__.session.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) #logger.info("[%s/%ld] Detection run complete", peer, timestamp) # Visualization of the results of a detection. # vis_util.visualize_boxes_and_labels_on_image_array( # image_np, # np.squeeze(boxes), # np.squeeze(classes).astype(np.int32), # np.squeeze(scores), # category_index, # use_normalized_coordinates=True, # line_thickness=8) # plt.figure(figsize=IMAGE_SIZE) # plt.imshow(image_np) #logger.info("[%s/%ld] We have a result", peer, timestamp) # Angle of view, image height, image width, image height center pixel, image width center pixel, # and pixel degree, fov = fov imageHeight = height imageWidth = width imageHeightCenter = imageHeight / 2 imageWidthCenter = imageWidth / 2 pixelDegree = float(fov) / imageWidth # Convert tensorflow data to pandas data frams df = pd.DataFrame(boxes.reshape(100, 4), columns=['y_min', 'x_min', 'y_max', 'x_max']) df1 = pd.DataFrame(classes.reshape(100, 1), columns=['classes'], dtype=int) df2 = pd.DataFrame(scores.reshape(100, 1), columns=['scores']) df5 = pd.concat([df, df1, df2], axis=1) # Transform box bound coordinates to pixel coordintate df5['y_min_t'] = df5['y_min'].apply(lambda x: x * imageHeight) df5['x_min_t'] = df5['x_min'].apply(lambda x: x * imageWidth) df5['y_max_t'] = df5['y_max'].apply(lambda x: x * imageHeight) df5['x_max_t'] = df5['x_max'].apply(lambda x: x * imageWidth) # Create objects pixel location # Create objects pixel location x and y # X df5['ob_wid_x'] = df5['x_max_t'] - df5["x_min_t"] df5['ob_mid_x'] = df5['ob_wid_x'] / 2 df5['x_loc'] = df5["x_min_t"] + df5['ob_mid_x'] # Y df5['ob_hgt_y'] = df5['y_max_t'] - df5["y_min_t"] df5['ob_mid_y'] = df5['ob_hgt_y'] / 2 df5['y_loc'] = df5["y_min_t"] + df5['ob_mid_y'] # Find object degree of angle, data is sorted by score, select person with highest score df5['object_angle'] = df5['x_loc'].apply( lambda x: -(imageWidthCenter - x) * pixelDegree) df6 = df5.loc[(df5['classes'] == idclass) & (df5['scores'] > threshhold)] # dfLabeled = pd.concat(category_index[df5['classes']], df5['scores']) # resp['object_scores'] = dfLabeled resp['object_scores'] = df5[1:10].to_string( columns=['classes', 'scores']) #resp["object_scores"] = ' '.join(str(e) for e in df5['classes']) + ' : ' + ' '.join(str(e) for e in df5['scores']) #resp["object_classes"] = df6['classes'] # session.close() #logger.info("[%s/%ld] Returning AOB", peer, timestamp) if df6.empty: AOB = None else: df7 = df6.iloc[0]['object_angle'] AOB = df7 + ch resp["object_found"] = True try: resp["object_score"] = str(df6['scores'][0]) except: resp["object_score"] = str(-1) resp["object_identified"] = "person" # Print AOB # print AOB # print AOB end = datetime.now() delta = end - start resp["aob"] = AOB resp["time"] = delta.total_seconds() logger.info("[%s/%ld] AOB response: %s", peer, timestamp, json.dumps(resp)) return resp
def processimages(path_images_dir, path_labels_map, save_directory): pathcpkt = 'data/output_inference_graph.pb/frozen_inference_graph.pb' csv_file = 'data/csvfile.csv' num_classes = 6 detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(pathcpkt, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(path_labels_map) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_classes, use_display_name=True) category_index = label_map_util.create_category_index(categories) f = open(csv_file, 'w') #f.write( # 'timestamp,number cars in bike lane, number trucks in bike lane, ' # 'number cars in bus stop, number trucks in bus stop\n') def load_image_into_numpy_array(imageconvert): (im_width, im_height) = imageconvert.size try: return np.array(imageconvert.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) except ValueError: return np.array([]) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') polygon_right_lane = [(178, 122), (188, 240), (231, 240), (187, 125)] polygon_left_lane = [(108, 143), (0, 215), (0, 233), (123, 142), (108, 97)] polygon_bus_lane = [(200, 155), (230, 240), (292, 240), (225, 157)] pathrightlane = mpltPath.Path(polygon_right_lane) pathleftlane = mpltPath.Path(polygon_left_lane) pathbuslane = mpltPath.Path(polygon_bus_lane) for testpath in os.listdir(path_images_dir): start_time = time.time() timestamp = testpath.split(".jpg")[0] try: image = Image.open(path_images_dir + '/' + testpath) image_np = load_image_into_numpy_array(image) except IOError: print("Issue opening " + testpath) continue if image_np.size == 0: print("Skipping image " + testpath) continue # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, min_score_thresh=0.4, use_normalized_coordinates=True, line_thickness=2) scores = np.squeeze(scores) boxes = np.squeeze(boxes) num_cars_in_bikelane, num_cars_in_bus_stop, num_trucks_in_bike_lane, num_trucks_in_bus_stop = 0, 0, 0, 0 for i in range(boxes.shape[0]): if scores[i] > .4: box = tuple(boxes[i].tolist()) ymin, xmin, ymax, xmax = box center_x = (((xmax * 352) - (xmin * 352)) / 2) + (xmin * 352) center_y = (((ymax * 240) - (ymin * 240)) / 2) + (ymin * 240) classes = np.squeeze(classes).astype(np.int32) if classes[i] in category_index.keys(): class_name = category_index[classes[i]]['name'] else: class_name = 'N/A' if class_name == 'car': points = [(center_x, center_y)] if pathrightlane.contains_points( points) or pathleftlane.contains_points( points): num_cars_in_bikelane += 1 elif pathbuslane.contains_points(points): num_cars_in_bus_stop += 1 elif class_name == 'truck' or class_name == 'police' or class_name == 'ups': points = [(center_x, center_y)] if pathrightlane.contains_points( points) or pathleftlane.contains_points( points): num_trucks_in_bike_lane += 1 elif pathbuslane.contains_points(points): num_trucks_in_bus_stop += 1 # write to a csv file whenever there is a vehicle, how many and of what type with timestamp f.write(timestamp + ',' + str(num_cars_in_bikelane) + ',' + str(num_trucks_in_bike_lane) + ',' + str(num_cars_in_bus_stop) + ',' + str(num_trucks_in_bus_stop) + '\n') print("Process Time " + str(time.time() - start_time)) scipy.misc.imsave(save_directory + testpath, image_np) f.close() return csv_file
def __init__(self): self.seq = 0 self.ready = False self.counter = 0 self.bridge = CvBridge() self.camera_topic = rospy.get_param('~camera_topic', "/image_raw") self.image_sub = rospy.Subscriber(self.camera_topic, Image, self.callback, queue_size=1) self.render = rospy.get_param('~render', True) if self.render: self.image_pub = rospy.Publisher("detections/image_raw/compressed", CompressedImage, queue_size=5) self.model_name = rospy.get_param('~model_name') self.models_dir = rospy.get_param('~models_dir') self.path_to_ckpt = self.models_dir + '/' + self.model_name + '/frozen_inference_graph.pb' self.path_to_labels =rospy.get_param('~path_to_labels') self.num_classes = rospy.get_param('~num_classes', 90) self.threshold = rospy.get_param('~threshold', 0.5) self.rotate = rospy.get_param('~rotate', False) self.debug = rospy.get_param('~debug', False) self.bbox_pub = rospy.Publisher(self.camera_topic+"/detections", BBoxArray, queue_size=5) print("path_to_ckpt:",self.path_to_ckpt) print("path_to_labels:",self.path_to_labels) if self.path_to_ckpt == '' or self.path_to_labels == '': print("\n\nProvide requiered args: path_to_ckpt, path_to_labels") print("Shutting down.") exit(-1) self.label_map = label_map_util.load_labelmap(self.path_to_labels) self.categories = label_map_util.convert_label_map_to_categories(self.label_map, max_num_classes=self.num_classes, use_display_name=True) self.category_index = label_map_util.create_category_index(self.categories) print("Category map loaded:") for i,n in zip (self.category_index.keys(),[str(_['name']) for _ in self.category_index.values()]): print("%4d %s"%(i,n)) self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() print("Loading model") with tf.gfile.GFile(self.path_to_ckpt, 'rb') as fid: serialized_graph = fid.read() print("Parsing") od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config, graph=self.detection_graph) # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() print("Outputs:") all_tensor_names = {output.name for op in ops for output in op.outputs} self.tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: print(" "+key) self.tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) self.image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') self.ready = True print("Model loaded. Waiting for messages on topic:",self.camera_topic)
def load_labels(path_to_labels, num_classes): label_map = label_map_util.load_labelmap(path_to_labels) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_classes, use_display_name=True ) return label_map_util.create_category_index(categories)
def distance_function(cap): # # Model preparation # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_FROZEN_GRAPH = 'object_detection/utils/ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = 'object_detection/utils/data/mscoco_label_map.pbtxt' NUM_CLASSES = 90 # ## Loading label map # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # ## Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') with tf.Session(graph=detection_graph) as sess: while True: #screen = cv2.resize(grab_screen(region=(0,40,1280,745)), (WIDTH,HEIGHT)) # screen = cv2.resize(grab_screen(region=(0,40,1280,745)), (800,450)) image_np = cv2.cvtColor(cap, cv2.COLOR_BGR2RGB) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) for i, b in enumerate(boxes[0]): # car bus truck if classes[0][i] == 3 or classes[0][i] == 6 or classes[0][ i] == 8: if scores[0][i] >= 0.5: mid_x = (boxes[0][i][1] + boxes[0][i][3]) / 2 mid_y = (boxes[0][i][0] + boxes[0][i][2]) / 2 apx_distance = round( ((1 - (boxes[0][i][3] - boxes[0][i][1]))**4), 1) cv2.putText(image_np, '{}'.format(apx_distance), (int(mid_x * 800), int(mid_y * 450)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) if apx_distance <= 0.5: if mid_x > 0.3 and mid_x < 0.7: cv2.putText(image_np, 'WARNING!!!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3)
import tensorflow as tf import object_detection from object_detection.utils import label_map_util import cv2 import numpy as np import os import sys model_path = "./data/models/ssdlite_mobilenet_v2_coco_2018_05_09/frozen_inference_graph.pb" NUM_CLASSES = 90 # label_map = label_map_util.load_labelmap('/home/ruth/Documents/Bumblebee/ML/models/label_map.pbtxt') label_map = label_map_util.load_labelmap('./data/labels/mscoco_label_map.pbtxt') class HumanDetector: def __init__ (self, min_score_thresh=.5): self.min_score_thresh = min_score_thresh self.load_model() def load_model(self): self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
def run(self): time1 = time.time() MIN_ratio = 0.8 #MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' MODEL_NAME = 'faster_rcnn_inception_v2_coco_2018_01_28' GRAPH_FILE_NAME = 'frozen_inference_graph.pb' LABEL_FILE = 'data/mscoco_label_map.pbtxt' NUM_CLASSES = 90 #end define label_map = lmu.load_labelmap(LABEL_FILE) categories = lmu.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) categories_index = lmu.create_category_index(categories) print("call label_map & categories : %0.5f" % (time.time() - time1)) graph_file = MODEL_NAME + '/' + GRAPH_FILE_NAME #thread function def find_detection_target(categories_index, classes, scores): time1_1 = time.time() #스레드함수 시작시간 print("스레드 시작") objects = [] #리스트 생성 for index, value in enumerate(classes[0]): object_dict = {} #딕셔너리 if scores[0][index] > MIN_ratio: object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \ scores[0][index] objects.append(object_dict) #리스트 추가 print(objects) print("스레드 함수 처리시간 %0.5f" & (time.time() - time1_1)) #end thread function detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(graph_file, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sses = tf.Session(graph=detection_graph) print("store in memoey time : %0.5f" % (time.time() - time1)) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') print("make tensor time : %0.5f" % (time.time() - time1)) #capture = cv2.VideoCapture(0) capture = cv2.VideoCapture("20190916_162900.mp4") prevtime = 0 #thread_1 = Process(target = find_detection_target, args = (categories_index, classes, scores))#쓰레드 생성 print("road Video time : %0.5f" % (time.time() - time1)) while True: ret, frame = capture.read() frame_expanded = np.expand_dims(frame, axis=0) height, width, channel = frame.shape #프레임 표시 curtime = time.time() sec = curtime - prevtime prevtime = curtime fps = 1 / sec str = "FPS : %0.1f" % fps cv2.putText(frame, str, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0)) #end 프레임 (boxes, scores, classes, nums) = sses.run( #np.ndarray [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) #end sses.run() vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), categories_index, use_normalized_coordinates=True, min_score_thresh=MIN_ratio, #최소 인식률 line_thickness=2) #선두께 # objects = [] #리스트 생성 for index, value in enumerate(classes[0]): object_dict = {} # 딕셔너리 if scores[0][index] > MIN_ratio: object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \ scores[0][index] # objects.append(object_dict) #리스트 추가 '''visualize_boxes_and_labels_on_image_array box_size_info 이미지 정 for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box [index][0] [1] [2] [3] ''' ymin = int((boxes[0][index][0] * height)) xmin = int((boxes[0][index][1] * width)) ymax = int((boxes[0][index][2] * height)) xmax = int((boxes[0][index][3] * width)) Result = frame[ymin:ymax, xmin:xmax] cv2.imwrite('car.jpg', Result) try: # print(NP.check()) NP.number_recognition('car.jpg') except: print("응안돼") cv2.imshow('re', Result) # print(objects) key = cv2.waitKey(1) & 0xFF if ret: # https://stackoverflow.com/a/55468544/6622587 rgbImage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) h, w, ch = rgbImage.shape bytesPerLine = ch * w convertToQtFormat = QtGui.QImage(rgbImage.data, w, h, bytesPerLine, QtGui.QImage.Format_RGB888) p = convertToQtFormat.scaled(640, 480, Qt.KeepAspectRatio) self.changePixmap.emit(p) if key == ord("q"): break '''
min_confidence = 0.5 num_classes = 2 lr_model = joblib.load('output/models/model_LR.pkl') model = tf.Graph() with model.as_default(): graphDef = tf.GraphDef() with tf.gfile.GFile( "C:/Users/SKS/Desktop/AAIC/Malaria_Detection_TS/experiments/exported_model/frozen_inference_graph.pb", "rb") as f: serializedGraph = f.read() graphDef.ParseFromString(serializedGraph) tf.import_graph_def(graphDef, name="") labelMap = label_map_util.load_labelmap(labels_loc) categories = label_map_util.convert_label_map_to_categories( labelMap, max_num_classes=num_classes, use_display_name=True) categoryIdx = label_map_util.create_category_index(categories) classes = ['gametocyte', 'leukocyte', 'ring', 'schizont', 'trophozoite'] testing_predicition_rnn = {} testing_predicition_vgg = {} predicted_boxes_stacked_test = defaultdict(dict) with model.as_default(): with tf.Session(graph=model) as sess: imageTensor = model.get_tensor_by_name("image_tensor:0") boxesTensor = model.get_tensor_by_name("detection_boxes:0") scoresTensor = model.get_tensor_by_name("detection_scores:0") classesTensor = model.get_tensor_by_name("detection_classes:0")
def main(unused_argv): assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.' assert FLAGS.eval_dir, '`eval_dir` is missing.' tf.gfile.MakeDirs(FLAGS.eval_dir) wait_time = 300 while wait_time > 0: latest_checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if latest_checkpoint: num_steps = latest_checkpoint.split('-')[-1] if int(num_steps) > 0: wait_time = 0 if wait_time > 0: tf.logging.info("waiting for checkpoint...") time.sleep(wait_time) if FLAGS.pipeline_config_path: configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.eval_dir, 'pipeline.config'), overwrite=True) else: configs = config_util.get_configs_from_multiple_files( model_config_path=FLAGS.model_config_path, eval_config_path=FLAGS.eval_config_path, eval_input_config_path=FLAGS.input_config_path) for name, config in [('model.config', FLAGS.model_config_path), ('eval.config', FLAGS.eval_config_path), ('input.config', FLAGS.input_config_path)]: tf.gfile.Copy(config, os.path.join(FLAGS.eval_dir, name), overwrite=True) model_config = configs['model'] eval_config = configs['eval_config'] input_config = configs['eval_input_config'] if FLAGS.eval_training_data: input_config = configs['train_input_config'] do_augmentation = False if input_config.WhichOneof('input_reader') == 'tf_record_input_reader': input_reader_config = input_config.tf_record_input_reader input_path = input_reader_config.input_path if not input_path or not input_path[0]: do_augmentation = True train_input_config = configs['train_input_config'] train_input_reader_config = train_input_config.tf_record_input_reader input_reader_config.input_path[:] = train_input_reader_config.input_path[:] model_fn = functools.partial(model_builder.build, model_config=model_config, is_training=False) def get_next(config): return dataset_util.make_initializable_iterator( dataset_builder.build(config)).get_next() create_input_dict_fn = functools.partial(get_next, input_config) label_map = label_map_util.load_labelmap(input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes) if FLAGS.run_once: eval_config.max_evals = 1 evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories, FLAGS.checkpoint_dir, FLAGS.eval_dir, do_augmentation=do_augmentation)
def predictImages(modelArg, labelsArg, imagePathArg, num_classesArg, min_confidenceArg, image_displayArg, pred_stagesArg): # initialize the model model = tf.Graph() # create a context manager that makes this model the default one for # execution with model.as_default(): # initialize the graph definition graphDef = tf.GraphDef() # load the graph from disk with tf.gfile.GFile(modelArg, "rb") as f: serializedGraph = f.read() graphDef.ParseFromString(serializedGraph) tf.import_graph_def(graphDef, name="") # load the class labels from disk labelMap = label_map_util.load_labelmap(labelsArg) categories = label_map_util.convert_label_map_to_categories( labelMap, max_num_classes=num_classesArg, use_display_name=True) categoryIdx = label_map_util.create_category_index(categories) # create a plateFinder plateFinder = PlateFinder(min_confidenceArg, categoryIdx, rejectPlates=False, charIOUMax=0.3) # create plate displayer plateDisplay = PlateDisplay() # create a session to perform inference with model.as_default(): with tf.Session(graph=model) as sess: # create a predicter, used to predict plates and chars predicter = Predicter(model, sess, categoryIdx) imagePaths = paths.list_images(imagePathArg) frameCnt = 0 start_time = time.time() # Loop over all the images for imagePath in imagePaths: frameCnt += 1 # load the image from disk print("[INFO] Loading image \"{}\"".format(imagePath)) image = cv2.imread(imagePath) (H, W) = image.shape[:2] # If prediction stages == 2, then perform prediction on full image, find the plates, crop the plates from the image, # and then perform prediction on the plate images if pred_stagesArg == 2: # Perform inference on the full image, and then select only the plate boxes boxes, scores, labels = predicter.predictPlates( image, preprocess=True) licensePlateFound_pred, plateBoxes_pred, plateScores_pred = plateFinder.findPlatesOnly( boxes, scores, labels) # loop over the plate boxes, find the chars inside the plate boxes, # and then scrub the chars with 'processPlates', resulting in a list of final plateBoxes, char texts, char boxes, char scores and complete plate scores plates = [] for plateBox in plateBoxes_pred: boxes, scores, labels = predicter.predictChars( image, plateBox) chars = plateFinder.findCharsOnly( boxes, scores, labels, plateBox, image.shape[0], image.shape[1]) if len(chars) > 0: plates.append(chars) else: plates.append(None) plateBoxes_pred, charTexts_pred, charBoxes_pred, charScores_pred, plateAverageScores_pred = plateFinder.processPlates( plates, plateBoxes_pred, plateScores_pred) # If prediction stages == 1, then predict the plates and characters in one pass elif pred_stagesArg == 1: # Perform inference on the full image, and then find the plate text associated with each plate boxes, scores, labels = predicter.predictPlates( image, preprocess=False) licensePlateFound_pred, plateBoxes_pred, charTexts_pred, charBoxes_pred, charScores_pred, plateScores_pred = plateFinder.findPlates( boxes, scores, labels) else: print( "[ERROR] --pred_stages {}. The number of prediction stages must be either 1 or 2" .format(pred_stagesArg)) quit() # Print plate text for charText in charTexts_pred: print(" Found: ", charText) # Display the full image with predicted plates and chars if image_displayArg == True: imageLabelled = plateDisplay.labelImage( image, plateBoxes_pred, charBoxes_pred, charTexts_pred) cv2.imshow("Labelled Image", imageLabelled) cv2.waitKey(0) # print some performance statistics curTime = time.time() processingTime = curTime - start_time fps = frameCnt / processingTime print( "[INFO] Processed {} frames in {:.2f} seconds. Frame rate: {:.2f} Hz" .format(frameCnt, processingTime, fps)) platesReturn = {} for i, plateBox in enumerate(plateBoxes_pred): #platesReturn[i] = { 'plateBoxLoc': plateBox, 'plateText': charTexts_pred[i], 'charBoxLocs': charBoxes_pred[i]} platesReturn[i] = { 'plateText': charTexts_pred[i], 'plateBoxLoc': list(plateBox), 'charBoxLocs': list([list(x) for x in charBoxes_pred[i]]) } #results = results.encode('utf-8') return platesReturn
def _build_center_net_model(center_net_config, is_training, add_summaries): """Build a CenterNet detection model. Args: center_net_config: A CenterNet proto object with model configuration. is_training: True if this model is being built for training purposes. add_summaries: Whether to add tf summaries in the model. Returns: CenterNetMetaArch based on the config. """ image_resizer_fn = image_resizer_builder.build( center_net_config.image_resizer) _check_feature_extractor_exists(center_net_config.feature_extractor.type) feature_extractor = _build_center_net_feature_extractor( center_net_config.feature_extractor) object_center_params = object_center_proto_to_params( center_net_config.object_center_params) object_detection_params = None if center_net_config.HasField('object_detection_task'): object_detection_params = object_detection_proto_to_params( center_net_config.object_detection_task) keypoint_params_dict = None if center_net_config.keypoint_estimation_task: label_map_proto = label_map_util.load_labelmap( center_net_config.keypoint_label_map_path) keypoint_map_dict = { item.name: item for item in label_map_proto.item if item.keypoints } keypoint_params_dict = {} keypoint_class_id_set = set() all_keypoint_indices = [] for task in center_net_config.keypoint_estimation_task: kp_params = keypoint_proto_to_params(task, keypoint_map_dict) keypoint_params_dict[task.task_name] = kp_params all_keypoint_indices.extend(kp_params.keypoint_indices) if kp_params.class_id in keypoint_class_id_set: raise ValueError( ('Multiple keypoint tasks map to the same class id is ' 'not allowed: %d' % kp_params.class_id)) else: keypoint_class_id_set.add(kp_params.class_id) if len(all_keypoint_indices) > len(set(all_keypoint_indices)): raise ValueError('Some keypoint indices are used more than once.') mask_params = None if center_net_config.HasField('mask_estimation_task'): mask_params = mask_proto_to_params( center_net_config.mask_estimation_task) densepose_params = None if center_net_config.HasField('densepose_estimation_task'): densepose_params = densepose_proto_to_params( center_net_config.densepose_estimation_task) track_params = None if center_net_config.HasField('track_estimation_task'): track_params = tracking_proto_to_params( center_net_config.track_estimation_task) temporal_offset_params = None if center_net_config.HasField('temporal_offset_task'): temporal_offset_params = temporal_offset_proto_to_params( center_net_config.temporal_offset_task) return center_net_meta_arch.CenterNetMetaArch( is_training=is_training, add_summaries=add_summaries, num_classes=center_net_config.num_classes, feature_extractor=feature_extractor, image_resizer_fn=image_resizer_fn, object_center_params=object_center_params, object_detection_params=object_detection_params, keypoint_params_dict=keypoint_params_dict, mask_params=mask_params, densepose_params=densepose_params, track_params=track_params, temporal_offset_params=temporal_offset_params, use_depthwise=center_net_config.use_depthwise, compute_heatmap_sparse=center_net_config.compute_heatmap_sparse)
from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util CWD_PATH = os.getcwd() # Path to frozen detection graph. This is the actual model that is used for the object detection. MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb') # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) def detect_objects(image_np, sess, detection_graph): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label.
) parser.add_argument( "-c", "--csv_path", help="Path of output .csv file. If none provided, then no file will be " "written.", type=str, default=None, ) args = parser.parse_args() if args.image_dir is None: args.image_dir = args.xml_dir label_map = label_map_util.load_labelmap(args.labels_path) label_map_dict = label_map_util.get_label_map_dict(label_map) def xml_to_csv(path): """Iterates through all .xml files (generated by labelImg) in a given directory and combines them in a single Pandas dataframe. Parameters: ---------- path : str The path containing the .xml files Returns ------- Pandas DataFrame The produced dataframe
tar_file.extract(file, os.getcwd()) # %% import tensorflow.compat.v1 as tf detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # %% label_map = label_map_util.load_labelmap(os.path.join(PATH_TO_LABELS)) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # %% def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) # %% PATH_TO_TEST_IMAGES_DIR = 'test_images'
output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile("output_inference_graph/frozen_inference_graph.pb", 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap("data/burgers_label_map.pb.txt") categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=6, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) def eval(layer): image_path = os.path.join("canonical", layer + ".png") image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection.
import numpy as np import argparse ap = argparse.ArgumentParser() ap.add_argument("-m", "--model", required=True, help="image") ap.add_argument("-l", "--label_map", required=True, help="image") args = vars(ap.parse_args()) with tf.gfile.FastGFile(args["model"], 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) with tf.Session() as sess: # Restore session sess.graph.as_default() tf.import_graph_def(graph_def, name='') label_map = label_map_util.load_labelmap(args["label_map"]) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=3, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Read and preprocess an image. #img = cv.imread('example2.jpg') ##rows = img.shape[0] #cols = img.shape[1] #inp = cv.resize(img, (300, 300)) #inp = inp[:, :, [2, 1, 0]] # BGR2RGB vs = VideoStream(src=0).start() time.sleep(2.0) fps = FPS().start() while True: # grab the frame from the threaded video stream and resize it # to have a maximum width of 400 pixels
def setup_platform(hass, config, add_entities, discovery_info=None): """Set up the TensorFlow image processing platform.""" model_config = config.get(CONF_MODEL) model_dir = model_config.get(CONF_MODEL_DIR) \ or hass.config.path('tensorflow') labels = model_config.get(CONF_LABELS) \ or hass.config.path('tensorflow', 'object_detection', 'data', 'mscoco_label_map.pbtxt') # Make sure locations exist if not os.path.isdir(model_dir) or not os.path.exists(labels): _LOGGER.error("Unable to locate tensorflow models or label map") return # append custom model path to sys.path sys.path.append(model_dir) try: # Verify that the TensorFlow Object Detection API is pre-installed # pylint: disable=unused-import,unused-variable os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf # noqa from object_detection.utils import label_map_util # noqa except ImportError: # pylint: disable=line-too-long _LOGGER.error( "No TensorFlow Object Detection library found! Install or compile " "for your system following instructions here: " "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md") # noqa return try: # Display warning that PIL will be used if no OpenCV is found. # pylint: disable=unused-import,unused-variable import cv2 # noqa except ImportError: _LOGGER.warning( "No OpenCV library found. TensorFlow will process image with " "PIL at reduced resolution") # setup tensorflow graph, session, and label map to pass to processor # pylint: disable=no-member detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') session = tf.Session(graph=detection_graph) label_map = label_map_util.load_labelmap(labels) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=90, use_display_name=True) category_index = label_map_util.create_category_index(categories) entities = [] for camera in config[CONF_SOURCE]: entities.append(TensorFlowImageProcessor( hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME), session, detection_graph, category_index, config)) add_entities(entities)
detection_thresh = cfg['pred_thresh'] num_classes = cfg['pred_classes'] csv_path = cfg['pred_csv_path'] images_path = cfg['pred_image_path'] dataset_name = cfg['pred_dataset_name'] project_name = cfg['pred_project_name'] client = GraphQLClient('https://api.labelbox.com/graphql') client.inject_token('Bearer ' + cfg['pred_api_key']) model_name = cfg['pred_model'] classes_filter = cfg['pred_classes_filter'] begin = cfg['pred_begin'] end = cfg['pred_end'] cv_bridge = CvBridge() label_map = label_map_util.load_labelmap("models/" + model_name + "/label_map.pbtxt") categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_classes, use_display_name=True) category_index = label_map_util.create_category_index(categories) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile( "models/" + model_name + "/frozen_inference_graph.pb", 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') config = tf.ConfigProto()
OBJECT_DETECTION_LABELS_PATH = OBJECT_DETECTION_MODEL_PATH + object_detection_model_name + '/oid_v5_label_map_customised.pbtxt' # define the max number of classes of objects to be detected object_detection_setting_file.readline() max_num_classes_object = int(object_detection_setting_file.readline()) # define which classes of objects to be detected selected_classes_object = [] object_detection_setting_file.readline() for i in range(max_num_classes_object): object_detection_setting_file.readline() class_setting = int(object_detection_setting_file.readline()) if class_setting == 1: selected_classes_object.append(i + 1) label_map_object = label_map_util.load_labelmap(OBJECT_DETECTION_LABELS_PATH) categories_object = label_map_util.convert_label_map_to_categories( label_map_object, max_num_classes=max_num_classes_object, use_display_name=True) category_index_object = label_map_util.create_category_index(categories_object) # load the object detection model into memory detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(OBJECT_DETECTION_CKPT_PATH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess_object = tf.Session(graph=detection_graph)
def detect_images(img_paths, save_detected_images=False, detection_threshold=0.5): # Define the video stream #cap = cv2.VideoCapture(0) # Change only if you have more than one webcams print(f'TensorFlow version {tf.__version__}') # What model to download. # Models can bee found here: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md # {model name for downloading} {model name} {speed in ms} {detection in COCO measurement units} #MODEL_NAME = 'ssd_inception_v2_coco_2017_11_17' # ssd_inception_v2_coco 42ms 24COCO mAP #MODEL_NAME = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03' # slower than ssd_inception_v2_coco_2017_11_17 model, same detection #ssd_resnet_50_fpn_coco ☆76ms 35 COCO mAP MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09' # fastest # same detection as ssd_inception_v2_coco_2017_11_17 #ssdlite_mobilenet_v2_coco 27ms 22 COCO mAP[^1] #MODEL_NAME = 'faster_rcnn_nas_coco_2018_01_28' # faster_rcnn_nas 1833ms 43 COCO mAP # DOES NOT WORK, it gets killed for some unknown reason MODEL_FILE = MODEL_NAME + '.tar.gz' DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. path_to_research_folder = "/home/nikola/Git/models/research/object_detection/data/" PATH_TO_LABELS = os.path.join( 'data', path_to_research_folder + 'mscoco_label_map.pbtxt') # Number of classes to detect NUM_CLASSES = 90 # Download Model if not os.path.exists(MODEL_FILE): print(f"Downloading {MODEL_NAME} model...") opener = urllib.request.URLopener() opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) tar_file = tarfile.open(MODEL_FILE) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, os.getcwd()) else: print(f"Model {MODEL_NAME} already downloaded") # Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Loading label map # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Helper code def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) def load_images(img_paths): ''' Load images via generator for less memory usage ''' for img_path in img_paths: if not os.path.exists(img_path): print( f"File could not be found. Check path and file extension. Entered path is {img_path}" ) exit(0) if not os.path.isfile(img_path): print( f"File is not a valid file. Check path and file extension. Entered path is {img_path}" ) exit(0) #width, height = img.size[0], img.size[1] #print('Frame size: width, height:', width, height) yield Image.open(img_path) # Detection with detection_graph.as_default(): with tf.compat.v1.Session(graph=detection_graph) as sess: for counter, img in enumerate(load_images(img_paths), 1): if img is None: print("Image is None") exit(0) image_np = load_image_into_numpy_array(img) #image_np = load_image_into_numpy_array(image_np) #cv2.imshow('Loaded image', image_np) #cv2.waitKey(0) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Extract image tensor image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Extract detection boxes boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Extract detection scores scores = detection_graph.get_tensor_by_name( 'detection_scores:0') # Extract detection classes classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Extract number of detectionsd num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=4, min_score_thresh=.5) # Print detected classes (above threshold level) # TODO: Count the same classes class_names = [ category_index[int(i)]['name'] for i in classes[0] ] above_threshold_scores = [ x for x in scores[0] if x > detection_threshold ] print( f"Detected classes: {list(zip(class_names, above_threshold_scores))}" ) img_filename_with_ext = img.filename.split('/')[-1] filename, file_ext = img_filename_with_ext.split( '.')[0], img.format # Print current progress print_progress_bar( counter, len(img_paths), prefix=f'Detecting image {img_filename_with_ext}') # Display output #cv2.imshow(f"{img_filename_with_ext} (press 'q' to exit)", cv2.resize(image_np, (800, 600))) # Save output if save_detected_images: img_save_path = str(filename + '_detected_output(' + str(counter) + ').' + file_ext) print(f'Saving detected output image to {img_save_path}') ret = cv2.imwrite(img_save_path, image_np) if ret == False: print(f'Warning. imwrite returned: {ret}')
def test_load_bad_label_map(self): label_map_string = """ item { id:0 name:'class that should not be indexed at zero' } item { id:2 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) with self.assertRaises(ValueError): label_map_util.load_labelmap(label_map_path) def test_keep_categories_with_unique_id(self): label_map_proto = string_int_label_map_pb2.StringIntLabelMap() label_map_string = """ item { id:2 name:'cat' } item { id:1 name:'child' } item { id:1 name:'person' } item { id:1 name:'n00007846' } """ text_format.Merge(label_map_string, label_map_proto) categories = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=3) self.assertListEqual([{ 'id': 2, 'name': u'cat' }, { 'id': 1, 'name': u'child' }], categories) def test_convert_label_map_to_categories_no_label_map(self): categories = label_map_util.convert_label_map_to_categories( None, max_num_classes=3) expected_categories_list = [{ 'name': u'category_1', 'id': 1 }, { 'name': u'category_2', 'id': 2 }, { 'name': u'category_3', 'id': 3 }] self.assertListEqual(expected_categories_list, categories) def test_convert_label_map_to_coco_categories(self): label_map_proto = self._generate_label_map(num_classes=4) categories = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=3) expected_categories_list = [{ 'name': u'1', 'id': 1 }, { 'name': u'2', 'id': 2 }, { 'name': u'3', 'id': 3 }] self.assertListEqual(expected_categories_list, categories) def test_convert_label_map_to_coco_categories_with_few_classes(self): label_map_proto = self._generate_label_map(num_classes=4) cat_no_offset = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=2) expected_categories_list = [{ 'name': u'1', 'id': 1 }, { 'name': u'2', 'id': 2 }] self.assertListEqual(expected_categories_list, cat_no_offset) def test_create_category_index(self): categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}] category_index = label_map_util.create_category_index(categories) self.assertDictEqual({ 1: { 'name': u'1', 'id': 1 }, 2: { 'name': u'2', 'id': 2 } }, category_index)
for file in files: if file.endswith(".jpg"): images.append(os.path.join(root, file)) images.sort() #Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') #Load Label Map label_map = label_map_util.load_labelmap(label_path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_classes, use_display_name=True) category_index = label_map_util.create_category_index(categories) #Detection with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name(
def main(_): assert FLAGS.train_dir, '`train_dir` is missing.' assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing' assert FLAGS.eval_dir, '`eval_dir` is missing.' configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir) tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.train_dir, 'pipeline.config'), overwrite=True) tf.gfile.MakeDirs(FLAGS.eval_dir) tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.eval_dir, 'pipeline.config'), overwrite=True) model_config = configs['model'] train_config = configs['train_config'] train_input_config = configs['train_input_config'] eval_config = configs['eval_config'] if FLAGS.eval_training_data: eval_input_config = configs['train_input_config'] else: eval_input_config = configs['eval_input_config'] # setting to run evaluation after EPOCHS_BETWEEN_EVALS epochs of training. # total number of training is set to total_num_epochs provided in the config if train_config.num_steps: total_num_epochs = train_config.num_steps train_config.num_steps = FLAGS.epochs_between_evals total_training_cycle = total_num_epochs // train_config.num_steps else: # TODO(mehdi): make it run indef total_num_epochs = 20000000 train_config.num_steps = FLAGS.epochs_between_evals total_training_cycle = total_num_epochs // train_config.num_steps train_model_fn = functools.partial(model_builder.build, model_config=model_config, is_training=True) eval_model_fn = functools.partial(model_builder.build, model_config=model_config, is_training=False) def get_next(config): return dataset_util.make_initializable_iterator( dataset_builder.build(config)).get_next() # functions to create a tensor input dictionary for both training & evaluation train_input_dict_fn = functools.partial(get_next, train_input_config) eval_input_dict_fn = functools.partial(get_next, eval_input_config) # If not explicitly specified in the constructor and the TF_CONFIG # environment variable is present, load cluster_spec from TF_CONFIG. env = json.loads(os.environ.get('TF_CONFIG', '{}')) cluster_data = env.get('cluster', None) cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None task_data = env.get('task', {'type': 'master', 'index': 0}) task_info = type('TaskSpec', (object,), task_data) # Parameters for a single worker. parameter_server_tasks = 0 worker_replicas = 1 worker_job_name = 'lonely_worker' task = 0 is_chief = True master = '' if cluster_data and 'worker' in cluster_data: # Number of total worker replicas include "worker"s and the "master". worker_replicas = len(cluster_data['worker']) + 1 if cluster_data and 'ps' in cluster_data: parameter_server_tasks = len(cluster_data['ps']) if worker_replicas > 1 and parameter_server_tasks < 1: raise ValueError('At least 1 ps task is needed for distributed training.') if worker_replicas >= 1 and parameter_server_tasks > 0: # Set up distributed training. server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc', job_name=task_info.type, task_index=task_info.index) if task_info.type == 'ps': server.join() return worker_job_name = '%s/task:%d' % (task_info.type, task_info.index) task = task_info.index is_chief = (task_info.type == 'master') master = server.target label_map = label_map_util.load_labelmap(eval_input_config.label_map_path) max_num_classes = max([item.id for item in label_map.item]) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes) if FLAGS.run_once: eval_config.max_evals = 1 train_graph_rewriter_fn = eval_graph_rewriter_fn = None if 'graph_rewriter_config' in configs: train_graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=True) eval_graph_rewriter_fn = graph_rewriter_builder.build( configs['eval_rewriter_config'], is_training=False) def train(): return trainer.train(create_tensor_dict_fn=train_input_dict_fn, create_model_fn=train_model_fn, train_config=train_config, master=master, task=task, num_clones=FLAGS.num_clones, worker_replicas=worker_replicas, clone_on_cpu=FLAGS.clone_on_cpu, ps_tasks=parameter_server_tasks, worker_job_name=worker_job_name, is_chief=is_chief, train_dir=FLAGS.train_dir, graph_hook_fn=train_graph_rewriter_fn) def evaluate(): return evaluator.evaluate(eval_input_dict_fn, eval_model_fn, eval_config, categories, FLAGS.train_dir, FLAGS.eval_dir, graph_hook_fn=eval_graph_rewriter_fn) for cycle_index in range(total_training_cycle): tf.logging.info('Starting a training cycle: %d/%d', cycle_index, total_training_cycle) train() tf.logging.info('Starting to evaluate.') eval_metrics = evaluate() if stopping_criteria_met(eval_metrics, FLAGS.mask_min_ap, FLAGS.box_min_ap): tf.logging.info('Stopping criteria met. Training stopped') break
def __init__(self, model_name, label_file='data/mscoco_label_map.pbtxt'): # Initialize some variables print("ObjectDetector('%s', '%s')" % (model_name, label_file)) self.process_this_frame = True # download model self.graph_file = model_name + '/' + self.GRAPH_FILE_NAME if not os.path.isfile(self.graph_file): self.download_model(model_name) # Load a (frozen) Tensorflow model into memory. self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(self.graph_file, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') graph = self.detection_graph ops = graph.get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = graph.get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, 480, 640) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) self.tensor_dict = tensor_dict self.sess = tf.Session(graph=self.detection_graph) # Loading label map # Label maps map indices to category names, # so that when our convolution network predicts `5`, # we know that this corresponds to `airplane`. # Here we use internal utility functions, # but anything that returns a dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(label_file) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) self.output_dict = None self.last_inference_time = 0