opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) tar_file = tarfile.open(MODEL_FILE) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, os.getcwd()) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) PATH_TO_TEST_IMAGES_DIR = 'images' TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image ({}).jpg'.format(i)) for i in range(21, 22) ] # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess:
def run(detection_graph, inputFile, srtData, targetFolder): # Setting up labels of trained model PATH_TO_LABELS = cfg.PRETRAINED_MODEL_LABELS category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) with tf.compat.v1.Session(graph=detection_graph) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') return_tensors = [ detection_boxes, detection_scores, detection_classes, num_detections ] print("Reading video at " + inputFile) vid = cv2.VideoCapture(inputFile) maxFrames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) video_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) video_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) print(f"Begin processing of video with {maxFrames} frames...") jumpedFrames = 0 # frames currFrame = jumpedFrames # progressbar pbar = tqdm(total=maxFrames) vid.set(cv2.CAP_PROP_POS_FRAMES, currFrame) while (vid.isOpened()): if currFrame == jumpedFrames: pbar.update(jumpedFrames) ret, frame = vid.read() if ret: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #image = Image.fromarray(frame) else: if (currFrame > jumpedFrames): # something has been processed earlier print("Done. Quitting...") break else: raise ValueError("Error while reading!", frame) (boxes, scores, classes, num) = detect_util.run_single_inference(sess, return_tensors, image_tensor, frame) score_threshold = 0.5 final_score = np.squeeze(scores) count = 0 for i in range(100): if scores is None or final_score[i] >= score_threshold: count = count + 1 if count > 0: # print(f"Found {count} detections in frame {currFrame} ") vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, ) # saving as image image = Image.fromarray(frame) exportName = srtData[currFrame].content filepath = targetFolder + "/" + exportName + ".jpg" image.save(filepath) box = np.squeeze(boxes) _, fields = box.shape assert fields is 4, "Error: Bounding boxes should have 4 coordinates, has " + fields score_arr = np.squeeze(scores) classes_arr = np.squeeze(classes) coord_boxes = [] for i, b in enumerate(box): score_of_detection = score_arr[i] if score_of_detection < score_threshold: continue ymin = int(b[0] * video_height) xmin = int(b[1] * video_width) ymax = int(b[2] * video_height) xmax = int(b[3] * video_width) class_id = int(classes_arr[i]) if ymin == 0 and xmin == 0 and ymax == 0 and xmax == 0: # images does not contain any more detections print(f"Breaking after {i} detections") break # ATTENTION: Watch order! coord_boxes.append([xmin, ymin, xmax, ymax, class_id]) # Save logfile for image # Format: TopleftX, TopleftY, BottomRightX, BottomRightY, Class ID filepathLog = targetFolder + "/" + exportName + ".txt" with open(filepathLog, "w") as logfile: for coords in coord_boxes: (xmin, ymin, xmax, ymax, score) = coords logfile.write( str(xmin) + ", " + str(ymin) + ", " + str(xmax) + ", " + str(ymax) + ", " + str(class_id) + "\n") pbar.update(1) currFrame = currFrame + 1 vid.release()
def scriptADetectingObjects(): alpr = Alpr("eu", "openalpr.conf", "runtime_data") if not alpr.is_loaded(): print("Error loading OpenALPR") sys.exit(1) alpr.set_top_n(1) alpr.set_default_region("eu") #cap = cv2.VideoCapture('videoa.mp4') # cap = cv2.VideoCapture('numPlates.mpg') # cap = cv2.VideoCapture('sheeriff.mp4') # cap = cv2.VideoCapture(0) # This is needed since the notebook is stored in the object_detection folder. sys.path.append("..") # ## Object detection imports # Here are the imports from the object detection module. from utils import label_map_util from utils import visualization_utils as vis_util # # Model preparation # ## Variables # # Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file. # # By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies. # What model to download. MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' MODEL_FILE = MODEL_NAME + '.tar.gz' DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') # ## Download Model # ## Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # ## Loading label map # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) # # Detection # For the sake of simplicity we will use only 2 images: # image1.jpg # image2.jpg # If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS. PATH_TO_TEST_IMAGES_DIR = 'test_images' TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ] # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) # In[10]: with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: while True: ret, image_np = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) cv2.imwrite('img.jpg', image_np) results = alpr.recognize_file('img.jpg') i = 0 # Recognizing from every frame of the video by incrementing by one for plate in results['results']: i += 1 print("Plate #%d" % i) print(" %12s %12s" % ("Plate", "Confidence")) for candidate in plate['candidates']: prefix = "-" # If character matches the one we have from pre-trained model then it marks "matches template" if candidate['matches_template']: prefix = "*" print(" %s %12s%12f" % (prefix, candidate['plate'], candidate['confidence'])) cv2.imshow('object detection', cv2.resize(image_np, (800, 600))) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break
path_to_pbtxt = '/home/ec2-user/obj-detection/annotations/rx_form_3.pbtxt' for p in (path_to_model, path_to_pbtxt): if not os.path.exists(p): print(f'Wrong Input {p}') # path_to_eval_images = '/home/ec2-user/obj-detection/eval_images/' path_to_eval_images = args.eval_images path_to_cropped_region = os.path.join(args.output_path, 'd_bboxes') path_to_inference_result = os.path.join(args.output_path, 'd_results') # visualization OD results on eval_images/ path_to_prediction_log = os.path.join(args.output_path, 'predictions.pkl') for p in (path_to_cropped_region, path_to_inference_result): if not os.path.exists(p): os.makedirs(p) print(f'Create dir: {p}') category_index = label_map_util.create_category_index_from_labelmap(path_to_pbtxt, use_display_name=True) label_map = {i: obj['name'] for i, obj in category_index.items()} detector = TFObjectDetector(path_to_model=path_to_model) test_images = glob.glob(path_to_eval_images + '*') print(' [ '+datetime.now().strftime("%b %d %H:%M:%S")+' ] ') print('Initializing inference graph, label map, test images..Done') print(f'Found {len(test_images)} Test images.') print(f'Start Object Detection with score >= {args.threshold}\n\n') if os.path.exists(path_to_prediction_log): os.unlink(path_to_prediction_log) records = [] for im_path in test_images: filename = os.path.basename(im_path) print('['+datetime.now().strftime("%b %d %H:%M:%S")+']' + f'Detecting: {filename}')
output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(results.model, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') os.environ["CUDA_VISIBLE_DEVICES"] = results.gpus_2_use category_index = label_map_util.create_category_index_from_labelmap(results.parts_label, use_display_name=True) with tf.Session(graph=detection_graph) as sess: for video in videos: print("Processing " + video + " now.\n") cap = cv2.VideoCapture(video) nframes = int(cap.get(7)) fps = int(round(cap.get(5))) if os.path.isdir(video.split(".")[0]): shutil.rmtree(video.split(".")[0]) os.mkdir(video.split(".")[0]) IMAGE_SIZE = (12, 8)
def getTreeLabel(number): # This is needed since the notebook is stored in the object_detection folder. sys.path.append("..") if StrictVersion(tf.__version__) < StrictVersion('1.12.0'): raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.') # What model to download. MODEL_NAME = '1028_rcnn_tree_4000_bs1_plus' MODEL_FILE = MODEL_NAME + '.tar.gz' DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_FROZEN_GRAPH = '/home/ubuntu/IITP_Project/analysis/' + MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join('1028_rcnn_tree_4000_bs1_plus', 'object-detection.pbtxt') NUM_CLASSES = 14 TEST_IMAGE_PATHS = [ os.path.join('./media/photos/tree/', 'tree' + number + '.jpg')] # Size, in inches, of the output images. IMAGE_SIZE = (24, 24) def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[1], image.shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.int64) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True) for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np_expanded, detection_graph) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, min_score_thresh=.6, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) now = datetime.now() cv2.imwrite('./analysis/static/labelImage/' + 'treeLabel' + number + '.jpg', image_np) if os.path.exists('./analysis/static/labelImage/' + 'treeLabel' + number + '.jpg'): print('finish') print(now) def get_label(num): lab = '' if num == 1: lab = 'root' elif num == 2: lab = 'knot' elif num == 3: lab = 'bird' elif num == 4: lab = 'animal' elif num == 5: lab = 'cfruit' elif num == 6: lab = 'nfruit' elif num == 7: lab = 'flower' elif num == 8: lab = 'grass' elif num == 9: lab = 'branch' elif num == 10: lab = 'cleaf' elif num == 11: lab = 'nleaf' else: lab = 0 return lab col_name1 = ['class'] col_name2 = ['score'] min_threshold = 0.6 df_class = pd.DataFrame(output_dict['detection_classes'], columns=col_name1) df_score = pd.DataFrame(output_dict['detection_scores'], columns=col_name2) df_sum = pd.concat([df_class, df_score], axis=1) df_final = df_sum[df_sum['score'] > min_threshold] # 아래 시리즈 데이터를 데이터프레임으로 전환 후, 제이슨으로 파일출력 df_final = pd.DataFrame(df_final['class'].apply(lambda x: get_label(x))) return df_final
def load_label_map(self, label_map_file): self.category_index = label_map_util.create_category_index_from_labelmap(label_map_file, use_display_name=True)
def load_labels(label_path): labels = label_map_util.create_category_index_from_labelmap( label_path, use_display_name=True) return labels
def model(): path = os.path.abspath(os.path.join(os.getcwd(), "../../..")) #print(path.split('/models')[0]) MODEL_NAME = 'faster_rcnn_inception_v2_coco_2018_01_28' MODEL_FILE = MODEL_NAME + '.tar.gz' #DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_FROZEN_GRAPH = os.path.join( path, 'workspace/training_demo/trained-inference-graphs/output_inference_graph_v1.pb/frozen_inference_graph.pb' .replace('/', os.sep)) # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join( path, 'workspace/training_demo/annotations/label_map.pbtxt'.replace( '/', os.sep)) tar_file = tarfile.open( os.path.join( path, 'workspace/training_demo/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz' .replace('/', os.sep))) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, os.getcwd()) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) PATH_TO_TEST_IMAGES_DIR = os.path.join( path, 'models/research/object_detection'.replace('/', os.sep)) # TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'pic{}.jpg'.format(i)) for i in range(1, 9) ] TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'pic.jpg')] # Size, in inches, of the output images. IMAGE_SIZE = (120, 80) total_count = 0 for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) fileName = os.path.basename(image_path).split('.')[0] # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection. k = vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, max_boxes_to_draw=None, line_thickness=8, min_score_thresh=.6, agnostic_mode=True, skip_labels=False) print('Count = %d' % k) total_count += k plt.figure(figsize=IMAGE_SIZE) fig = plt.gcf() fig.set_size_inches(24, 18) plt.imshow(image_np) plt.savefig(fileName + '1.jpg', bbox_inches='tight') return total_count
def working(image_given): import numpy as np import os import tensorflow as tf from distutils.version import StrictVersion from collections import defaultdict from io import StringIO from PIL import Image import cv2 as cv import sys #thinking it will not be required # This is needed since the notebook is stored in the object_detection folder. sys.path.append("..") from object_detection.utils import ops as utils_ops if StrictVersion(tf.__version__) < StrictVersion('1.12.0'): raise ImportError( 'Please upgrade your TensorFlow installation to v1.12.*.') import utils.label_map_util as label_map_util import utils.visualization_utils as vis_util MODEL_NAME = 'SayakDa' PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb' PATH_TO_LABELS = os.path.join(MODEL_NAME, 'Sayak.pbtxt') # Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Loading label map # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) # ## Helper code def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) # Detection # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) # TEST_IMAGE_PATHS def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze( tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze( tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast( tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[1], image.shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.int64) output_dict['detection_boxes'] = output_dict[ 'detection_boxes'][0] output_dict['detection_scores'] = output_dict[ 'detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] return output_dict image = Image.open(image_given) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np_expanded, detection_graph) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) return image_np
def get_predictions_from_image(frozen_inference_graph_path: str, label_map_path: str, img_to_predict_on): # Path to frozen detection graph. This is the actual model that is used # for the object detection. # = os.path.join(MODEL_NAME, 'frozen_inference_graph.pb') PATH_TO_FROZEN_GRAPH = frozen_inference_graph_path # List of the strings that is used to add correct label for each box. # = os.path.join('training', MODEL_PATH, 'label_map.pbtxt') PATH_TO_LABELS = label_map_path # %% md # Load a (frozen) Tensorflow model into memory. # %% detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # %% md # Loading label map # Label maps map indices to category names, so that when our # convolution network predicts `5`, we know that this # corresponds to `airplane`. Here we use internal utility functions, # but anything that returns a dictionary mapping integers to appropriate # string labels would be fine # %% category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) # %% md # Helper code def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) # %% md # Detection # %% # PATH_TO_OUTPUT_IMAGES_DIR = os.path.join(CUR_DIR_PATH, 'output_images') # if not os.path.exists(PATH_TO_OUTPUT_IMAGES_DIR): # os.makedirs(PATH_TO_OUTPUT_IMAGES_DIR) # # Size, in inches, of the output images. # IMAGE_SIZE = (60, 40) # %% def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.compat.v1.Session() as sess: # Get handles to input and output tensors ops = tf.compat.v1.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.compat.v1.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze( tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze( tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box # coordinates to image coordinates and fit the image size. real_num_detection = tf.cast( tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = \ utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[1], image.shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = \ tf.compat.v1.get_default_graph( ).get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image}) # all outputs are float32 numpy arrays, # so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.int64) output_dict['detection_boxes'] = output_dict[ 'detection_boxes'][0] output_dict['detection_scores'] = output_dict[ 'detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] return output_dict # %% image_np = load_image_into_numpy_array(img_to_predict_on) # Expand dimensions since the model expects images # to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np_expanded, detection_graph) # vis_util.visualize_boxes_and_labels_on_image_array( # image_np, # output_dict['detection_boxes'], # output_dict['detection_classes'], # output_dict['detection_scores'], # category_index, # instance_masks=output_dict.get('detection_masks'), # use_normalized_coordinates=True, # line_thickness=8) # plt.figure(figsize=IMAGE_SIZE) # plt.imshow(image_np) # plt.savefig(os.path.join(PATH_TO_OUTPUT_IMAGES_DIR, # 'predicted.jpg'), bbox_inches='tight') # print('Exported successfully!') return output_dict
def load_categories(pbtxt_path): """Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine""" return label_map_util.create_category_index_from_labelmap(pbtxt_path, use_display_name=True)
def makeHighlight(self): filename = self.f_label.text() cap = cv2.VideoCapture(filename) video_for_cut = VideoFileClip(filename) fps = cap.get(cv2.CAP_PROP_FPS) #sys.path.append("..") MODEL_NAME = 'soccer_highlight_goal2' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join('training', 'object-detection.pbtxt') detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.io.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) count = 1 cut_count = 0 hightlight = [] cut = [] with detection_graph.as_default(): with tf.compat.v1.Session(graph=detection_graph) as sess: while True: ret, image_np = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Actual detection. try: (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # 동영상 끝나면 highlightui로 넘어가게 된다. except TypeError: self.window = Ui_HighlightWindow() self.window.show() break # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) if (int(cap.get(1)) % 8 == 0): title = "%d.jpg" % count count += 1 # cv2.imshow('object detection', cv2.resize(image_np, (800, 600))) if (float(100 * scores[0][0]) > 99.7): print(title) hightlight.append(count) else: if (count - 1 in hightlight and count + 1 not in hightlight): cut.append(8 * (count - 4)) print(count - 1) if (len(cut) > 1): duration1 = cut[0] / fps duration2 = cut[1] / fps length = duration2 - duration1 print(length) if (length > 60 or length <= 2): cut[0] = cut[1] del (cut[1]) else: start_hour = (duration1 / 3600) start_min = ((duration1 % 3600) / 60) start_sec = duration1 % 60 end_hour = (duration2 / 3600) end_min = ((duration2 % 3600) / 60) end_sec = duration2 % 60 tmp_video = video_for_cut.subclip(duration1, duration2) tmp_title = "./videos/%d+%d+%d~%d+%d+%d.mp4" % ( start_hour, start_min, start_sec, end_hour, end_min, end_sec) cut_count += 1 tmp_video.write_videofile(tmp_title, codec='libx264') cut = []
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN # Make sure to set the Keras learning phase. True during training, # False for inference. tf.keras.backend.set_learning_phase(is_training) # Set policy for mixed-precision training with Keras-based models. if use_tpu and train_config.use_bfloat16: from tensorflow.python.keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0. base_layer_utils.enable_v2_dtype_behavior() tf2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16') detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch(labels, unpad_groundtruth_tensors=train_config. unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes]. get_shape().as_list()) unpad_groundtruth_tensors = boxes_shape[ 1] is not None and not use_tpu labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): provide_groundtruth(detection_model, labels) preprocessed_images = features[fields.InputDataFields.image] side_inputs = detection_model.get_side_inputs(features) if use_tpu and train_config.use_bfloat16: with tf.tpu.bfloat16_scope(): prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape], **side_inputs) prediction_dict = ops.bfloat16_to_float32_nested( prediction_dict) else: prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape], **side_inputs) def postprocess_wrapper(args): return detection_model.postprocess(args[0], args[1]) if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): if use_tpu and postprocess_on_cpu: detections = tf.tpu.outside_compilation( postprocess_wrapper, (prediction_dict, features[fields.InputDataFields.true_image_shape])) else: detections = postprocess_wrapper( (prediction_dict, features[fields.InputDataFields.true_image_shape])) if mode == tf.estimator.ModeKeys.TRAIN: load_pretrained = hparams.load_pretrained if hparams else False if train_config.fine_tune_checkpoint and load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config. fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): if (mode == tf.estimator.ModeKeys.EVAL and eval_config.use_dummy_loss_in_eval): total_loss = tf.constant(1.0) losses_dict = {'Loss/total_loss': total_loss} else: losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.values()] if train_config.add_regularization_loss: regularization_losses = detection_model.regularization_losses( ) if use_tpu and train_config.use_bfloat16: regularization_losses = ops.bfloat16_to_float32_nested( regularization_losses) if regularization_losses: regularization_loss = tf.add_n( regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict[ 'Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.tpu.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None include_variables = (train_config.update_trainable_variables if train_config.update_trainable_variables else None) exclude_variables = (train_config.freeze_variables if train_config.freeze_variables else None) trainable_variables = slim.filter_variables( tf.trainable_variables(), include_patterns=include_variables, exclude_patterns=exclude_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None if train_config.summarize_gradients: summaries = [ 'gradients', 'gradient_norm', 'global_gradient_norm' ] train_op = slim.optimizers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, update_ops=detection_model.updates(), variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: exported_output = exporter_lib.add_output_tensor_nodes(detections) export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(exported_output) } eval_metric_ops = None scaffold = None if mode == tf.estimator.ModeKeys.EVAL: class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic, eval_input_config.max_number_of_boxes) use_original_images = fields.InputDataFields.original_image in features if use_original_images: eval_images = features[fields.InputDataFields.original_image] true_image_shapes = tf.slice( features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) original_image_spatial_shapes = features[ fields.InputDataFields.original_image_spatial_shape] else: eval_images = features[fields.InputDataFields.image] true_image_shapes = None original_image_spatial_shapes = None eval_dict = eval_util.result_dict_for_batched_example( eval_images, features[inputs.HASH_KEY], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True, original_image_spatial_shapes=original_image_spatial_shapes, true_image_shapes=true_image_shapes) if fields.InputDataFields.image_additional_channels in features: eval_dict[fields.InputDataFields. image_additional_channels] = features[ fields.InputDataFields.image_additional_channels] if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index( ) else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) vis_metric_ops = None if not use_tpu and use_original_images: keypoint_edges = [(kp.start, kp.end) for kp in eval_config.keypoint_edge] eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections( category_index, max_examples_to_draw=eval_config.num_visualizations, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False, keypoint_edges=keypoint_edges or None) vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops( eval_dict) # Eval metrics on a single example. eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_config, list(category_index.values()), eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if vis_metric_ops is not None: eval_metric_ops.update(vis_metric_ops) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours ) scaffold = tf.train.Scaffold(saver=saver) # EVAL executes on CPU, so use regular non-TPU EstimatorSpec. if use_tpu and mode != tf.estimator.ModeKeys.EVAL: return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: if scaffold is None: keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( sharded=True, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, save_relative_paths=True) tf.add_to_collection(tf.GraphKeys.SAVERS, saver) scaffold = tf.train.Scaffold(saver=saver) return tf.estimator.EstimatorSpec(mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold)
from utils import label_map_util from utils import visualization_utils as vis_util #MODEL_NAME = 'write the name of model you have created' #PATH_TO_FROZEN_GRAPH = MODEL_NAME + 'frozen_inference_graph.pb' #PATH_TO_LABELS = os.path.join() detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile('frozen_inference_graph.pb', 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap( 'label.pbtxt', use_display_name=True) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) PATH_TO_TEST_IMAGES_DIR = 'test_images' # specify range according to the test images which you have chosed TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image3.png')] # Size, in inches, of the output images. IMAGE_SIZE = (12, 8)
#for file in tar_file.getmembers(): # file_name = os.path.basename(file.name) # if 'frozen_inference_graph.pb' in file_name: # tar_file.extract(file, os.getcwd()) # ## Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) file_path = filedialog.askopenfilename() PATH_TO_TEST_IMAGES_DIR = 'test_images' TEST_IMAGE_PATHS = file_path IMAGE_SIZE = (12, 8) def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {}
if score > threshold_score: num += 1 width = image.shape[1] height = image.shape[0] y_min = int(box[0] * height) x_min = int(box[1] * width) y_max = int(box[2] * height) x_max = int(box[3] * width) image_cut = image[y_min:y_max, x_min:x_max] angle = label_dict[label] image_rotate = rotate(image_cut, 360 - angle) out_file = os.path.join(out_path, file_name) cv2.imwrite(out_file, image_rotate) else: print('no object detected:', file_name) print(num_all, num, time_all / num_all) if __name__ == '__main__': graph_path = '/Users/gaoxin/models/ssd_mobilenet_v2_coco/exported_graph/frozen_inference_graph.pb' label_path = '/Users/gaoxin/models/ssd_mobilenet_v2_coco/idcard_label_map.pbtxt' image_path = '/Users/gaoxin/data/idcard/img_train/object_detection/data_idcard_front_eval' out_path = '/Users/gaoxin/data/idcard/img_eval_detected' detector = ObjectDetect(graph_path) category_index = label_map_util.create_category_index_from_labelmap( label_path, use_display_name=True) # inference_show(detector, category_index, image_path) inference_path(detector, image_path, out_path, 0.6)