def __init__(self, NUM_DETCTIONS): threading.Thread.__init__(self) self.NUM = NUM_DETCTIONS self.isess = tf.InteractiveSession() net_shape = (300, 300) data_format = 'NHWC' self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( self.img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) self.image_4d = tf.expand_dims(image_pre, 0) reuse = None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): self.predictions, self.localisations, _, _ = ssd_net.net( self.image_4d, is_training=False, reuse=reuse) #ckpt_filename = './checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' ckpt = tf.train.get_checkpoint_state( './checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt') #ckpt_filename = './checkpoints/ssd_300_vgg.ckpt/ssd_300_vgg.ckpt' self.isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() if ckpt and ckpt.model_checkpoint_path: saver.restore(self.isess, "VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt") self.ssd_anchors = ssd_net.anchors(net_shape) self.detecction = [] self.iDetections = []
def get_ssd_model_params(ckpt_filename): """获取ssd model""" slim = tf.contrib.slim # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) return {'isess': isess, 'image_4d': image_4d, 'predictions': predictions, 'localisations': localisations, 'bbox_img': bbox_img, 'img_input': img_input, 'ssd_anchors': ssd_anchors}
def __init__(self, weights = '../checkpoints/ssd_300_vgg.ckpt', mem_frac=1): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_frac) self.sess = sess =tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) self.isess = isess = tf.InteractiveSession(config=config) self.net_shape = net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, weights) # SSD default anchor boxes. self.ssd_anchors = ssd_net.anchors(net_shape) self.bbox_img = bbox_img self.img_input = img_input self.image_4d = image_4d self.ssd_net = ssd_net self.predictions = predictions self.localisations = localisations
def load(self, net_shape=(300, 300), data_format='NHWC', ckpt_filename=PERSON_MODEL): self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( self.img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) self.image_4d = tf.expand_dims(self.image_pre, 0) # Define the SSD model. ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): self.predictions, self.localisations, _, _ = ssd_net.net( self.image_4d, is_training=False, reuse=False) self.sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(self.sess, ckpt_filename) # SSD default anchor boxes. reuse = True self.ssd_anchors = ssd_net.anchors(net_shape)
def get_tenors(input=None, reuse=None,bbox=False): # Input placeholder. if not input == None: assert(input.get_shape().ndims == 4) image_4d=input-np.array([123.6800, 116.7790, 103.9390]).reshape((1,1,1,3)) else: image_4d= tf.placeholder(tf.uint8, shape=(None ,None, None, 3)) if bbox: image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval_multi( image_4d, None, None, (None, None), resize=ssd_vgg_preprocessing.Resize.NONE) if input==None: image_4d = tf.expand_dims(image_pre, 0) # Network parameters. params = ssd_vgg_300.SSDNet.default_params params = params._replace(num_classes=8) # SSD network construction. #reuse = True if 'ssd' in locals() else None #print("resue ", resuse) ssd = ssd_vgg_300.SSDNet(params) with slim.arg_scope(ssd.arg_scope(weight_decay=0.0005)): """ if reuse: tf.get_variable_scope().reuse_variables() """ #predictions, localisations, logits, end_points = ssd.net(image_4d, is_training=False, reuse=reuse) predictions, localisations, logits, end_points = ssd.net(image_4d, is_training=False) end_points={} end_points["input"] = image_4d rs =r"(.*\/conv[0-9]\/conv[0-9]_[0-9]/Relu$|.*ssd_300_vgg\/pool5\/MaxPool$)" rc = re.compile(rs) for op in tf.get_default_graph().as_graph_def().node: gr = rc.match(op.name) if gr: end_points[op.name.split("/")[-2]] = tf.get_default_graph().get_tensor_by_name(op.name+":0") #init_op = tf.global_variables_initializer() #isess.run(init_op) # Restore SSD model. #train_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.contrib.framework.get_name_scope()) #print(tf.contrib.framework.get_name_scope()) #assert(False) #saver = tf.train.Saver(var_list=train_vars) #saver.restore(isess, ckpt_filename) # Save back model to clean the checkpoint? # # Image Pipeline # # Presenting the different steps of the vehicle detection pipeline. # In[9]: if not input == None and not bbox: return end_points elif bbox: return predictions, localisations, bbox_img, logits, end_points, image_4d, ssd
def __init__(self, params=None): """Init the SSD net with some parameters. Use the default ones if none provided. """ if isinstance(params, SSDParams): self.params = params else: self.params = SSDNet.default_params self.student_model = ssd_mobilenetv1_300.SSDNet(self.params) self.teacher_model = ssd_vgg_300.SSDNet() self.teacher_kd_layer = {} self.student_kd_layer = {}
def main(): # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) sess = tf.Session(config=config) # isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Test on some demo image and visualize output. path = '../demo/' image_names = sorted(os.listdir(path)) img = mpimg.imread(path + image_names[-1]) rclasses, rscores, rbboxes = process_image(sess, img, image_4d, predictions, localisations, bbox_img, img_input, ssd_anchors) # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
def __init__(self, params=None): # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! self.gpu_options = tf.GPUOptions(allow_growth=True) self.config = tf.ConfigProto(log_device_placement=False, gpu_options=self.gpu_options) # Input placeholder. self.net_shape = (512, 512) if MODEL == 512 else (300, 300) self.data_format = 'NHWC' self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( self.img_input, None, None, self.net_shape, self.data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) self.image_4d = tf.expand_dims(self.image_pre, 0) # Define the SSD model. try: tf.get_variable('ssd_512_vgg/conv1/conv1_1/weights' ) if MODEL == 512 else tf.get_variable( 'ssd_300_vgg/conv1/conv1_1/weights') self.reuse = True # if tf.variable_scope('ssd_300_vgg/conv1/conv1_1/weights') else None except ValueError: print('model loading failed') self.reuse = None self.ssd_net = ssd_vgg_512.SSDNet( ) if MODEL == 512 else ssd_vgg_300.SSDNet() with slim.arg_scope( self.ssd_net.arg_scope(data_format=self.data_format)): self.predictions, self.localisations, _, _ = self.ssd_net.net( self.image_4d, is_training=False, reuse=self.reuse) # Restore SSD model. self.ckpt_filename = './checkpoints/VGG_VOC0712_SSD_512x512_ft_iter_120000.ckpt' if MODEL == 512 else './checkpoints/ssd_300_vgg.ckpt' # SSD default anchor boxes. self.ssd_anchors = self.ssd_net.anchors(self.net_shape) self.isess = tf.InteractiveSession(config=self.config) # Load Model self.isess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() self.saver.restore(self.isess, self.ckpt_filename)
def load(self): try: data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, self.net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None if self.net_to_use == 'ssd-300': ssd_net = ssd_vgg_300.SSDNet() else: ssd_net = ssd_vgg_512.SSDNet() with tf.contrib.slim.arg_scope( ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net( image_4d, is_training=False, reuse=reuse) # Restore SSD model. self.isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(self.isess, self.ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(self.net_shape) cropper_model = SSD_Bundle(ssd_net, img_input, predictions, localisations, bbox_img, image_4d, ssd_anchors) return cropper_model except Exception as e: message = "Could not load model." print(message + str(e)) raise Exception(message)
def __init__(self, select_threshold=0.5, nms_threshold=0.45, net_shape=(300, 300)): self.select_threshold = select_threshold self.nms_threshold = nms_threshold self.net_shape = net_shape slim = tf.contrib.slim gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) self.isess = tf.InteractiveSession(config=config) # Input placeholder net_shape = (300, 300) data_format = 'NHWC' self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( self.img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) self.image_4d = tf.expand_dims(self.image_pre, 0) # Define the SSD model reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): self.predictions, self.localisations, _, _ = ssd_net.net( self.image_4d, is_training=False, reuse=reuse) # Restore SSD Model ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' self.isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(self.isess, ckpt_filename) self.ssd_anchors = ssd_net.anchors(net_shape)
def __init__(self): self.isess = tf.InteractiveSession() self.num_classes = 3 self.ckpt_filename = '/home/hanxy/catkin_ws/src/icra_firefly/scripts/ckpts/model.ckpt-38804' params = ssd_vgg_300.SSDNet.default_params._replace( num_classes=self.num_classes) ssd = ssd_vgg_300.SSDNet(params) self.net_shape = (300, 300) self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) self.layers_anchors = ssd.anchors(self.net_shape, dtype=np.float32) image_pre, labels_pre, bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( self.img_input, None, None, self.net_shape) self.image_4d = tf.expand_dims(image_pre, 0) arg_scope = ssd.arg_scope(weight_decay=0.00004) with slim.arg_scope(arg_scope): # 定义ssd网络 self.predictions, self.localisations, logits, end_points = \ ssd.net(self.image_4d, is_training=False) init_op = tf.global_variables_initializer() self.isess.run(init_op) # Restore SSD model. saver = tf.train.Saver() saver.restore(self.isess, self.ckpt_filename)
def init_model(ckpt_path): l_VOC_CLASS = [name for name, tup in pascalvoc_common.VOC_LABELS.items()] # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = ckpt_path # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) return {'l_VOC_CLASS':l_VOC_CLASS,'ssd_anchors':ssd_anchors,'img_input':img_input,'isess':isess,'image_4d':image_4d,'predictions':predictions,'localisations':localisations,'bbox_img':bbox_img}
def main(argv): # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) sess = tf.Session(config=config) # isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Test on some demo image and visualize output. # import ipdb; ipdb.set_trace() if argv.output_dir is None: output_p_dir = os.path.dirname(argv.video_path) file_name = os.path.basename(argv.video_path).split('.')[0] output_dir = os.path.join(output_p_dir, file_name) if not os.path.isdir(output_dir): os.mkdir(output_dir) else: if not os.path.isdir(argv.output_dir): assert 0, "output_dir: {} is not exist.".format(argv.output_dir) output_dir = argv.output_dir cap = cv2.VideoCapture(argv.video_path) num_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT) count = 0 name_template = 'frame-%06d.txt' while (cap.isOpened()): ret, img = cap.read() assert ret, "Close video" rclasses, rscores, rbboxes = process_image(sess, img, image_4d, predictions, localisations, bbox_img, img_input, ssd_anchors) name = os.path.join(output_dir, name_template % count) visualization.save_bboxes_imgs_to_file(name, img, rclasses, rscores, rbboxes) count += 1 print("Detection frame [%d/%d]\r" % (count, num_frame), end="")
def set_centers(): print("开启线程:将object_centers放入queue") # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' ckpt_filename = 'checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes def get_centers(rclasses, rbboxes): # get center location of object number_classes = rclasses.shape[0] object_centers = [] for i in range(number_classes): object_center = dict() object_center['i'] = i object_center['class'] = rclasses[i] object_center['x'] = (rbboxes[i, 1] + rbboxes[i, 3]) / 2 # 对象中心的坐标x object_center['y'] = (rbboxes[i, 0] + rbboxes[i, 2]) / 2 # 对象中心的坐标y object_centers.append(object_center) return object_centers count = 0 cap = cv2.VideoCapture(0) while count < 100: # 打开摄像头 ret, img = cap.read() rclasses, rscores, rbboxes = process_image(img) ''' classes: 1.Aeroplanes 2.Bicycles 3.Birds 4.Boats 5.Bottles 6.Buses 7.Cars 8.Cats 9.Chairs 10.Cows 11.Dining tables 12.Dogs 13.Horses 14.Motorbikes 15.People 16.Potted plants 17.Sheep 18.Sofas 19.Trains 20.TV/Monitors ''' object_centers = get_centers(rclasses, rbboxes) # print("put object centers: " + str(object_centers)) for object_center in object_centers: if object_center['class'] == 5 or object_center['class'] == 7: new_object_center = object_center q.put(new_object_center) count += 1 break print("完成输入") cap.release()
def ssd_test(path): # Input Placeholder img_input = tf.placeholder(tf.uint8, shape= (None, None, 3)) # Evaluation pre-processing: resize to ssd net shape image_pre, labels_pre, bbox_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize= ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, axis=0) # Define the SSD model reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format= data_format)): predictions, localizations, _, _ = ssd_net.net(image_4d, is_training= False, reuse = reuse) # SSD default anchor boxes ssd_anchors = ssd_net.anchors(net_shape) # Main image processing pipeline # Tensorflow Session: grow memeory when needed, do not allow full GPU usage gpu_options = tf.GPUOptions(allow_growth = True) config = tf.ConfigProto(log_device_placement = False, gpu_options = gpu_options) isess = tf.InteractiveSession(config = config) # Restore the SSD model isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # Run the SSD network def post_process(img, select_thresh=0.5, nms_thresh=0.45): rimg, rpredictions, rlocalizations, rbbox_img = isess.run([image_4d, predictions, localizations, bbox_img], feed_dict= {img_input: img}) # get the classes and bboxes from the output rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(rpredictions, rlocalizations, ssd_anchors, select_threshold=select_thresh, img_shape = net_shape, num_classes = 21, decode = True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k = 400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold = nms_thresh) # Resize the bboxes to the original image sizes, but useless for Resize.WARP rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes imgs = os.listdir(path) for i in range(len(imgs)): img_path = os.path.join(path, imgs[i]) img = mpimg.imread(img_path) rclasses, rscores, rbboxes = post_process(img) visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
return objects data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) large = tf.placeholder(tf.bool) medium = tf.placeholder(tf.bool) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, (300,300), data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE,large=large,medium=medium) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None #reuse =True ssd_net = ssd_vgg_300.SSDNet(default_params) with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=tf.AUTO_REUSE,large=large,medium=medium) # Restore SSD model. #ckpt_filename = '../checkpoints/VGG_ILSVRC2016_SSD_300x300_iter_440000.ckpt' ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors1 = ssd_net.anchors((300,300)) ssd_anchors2 = ssd_vgg_300.ssd_anchors_all_layers((75,75),fovea_params.feat_shapes,fovea_params.anchor_sizes,fovea_params.anchor_ratios,fovea_params.anchor_steps,offset=0.5,dtype=np.float32) ssd_anchors3 = ssd_vgg_300.ssd_anchors_all_layers((150,150),medium_params.feat_shapes,medium_params.anchor_sizes,medium_params.anchor_ratios,medium_params.anchor_steps,offset=0.5,dtype=np.float32)
data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_300_vgg' in locals() else None ssd_params = ssd_vgg_300.SSDNet.default_params._replace(num_classes=4) ssd_net = ssd_vgg_300.SSDNet(ssd_params) with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) summaries.add(tf.summary.image("Image", image_4d)) f_i = 0 # for predict_map in predictions: # predict_map = predict_map[:, :, :, :, 1:] # predict_map = tf.reduce_max(predict_map, axis=4) # if f_i < 3: # predict_list = tf.split(predict_map, 6, axis=3) # anchor_index = 1 # for anchor in predict_list: # summaries.add(tf.summary.image("predicte_map_%d_anchor%d" % (f_i,anchor_index), tf.cast(anchor, tf.float32)))
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): global_step = slim.create_global_step() # Get the SSD network and its anchors. #ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes) ssd_net = ssd_vgg_300.SSDNet() ssd_shape = ssd_net.params.img_shape ssd_anchors = ssd_net.anchors(ssd_shape) b_image, b_gclasses, b_glocalisations, b_gscores = \ load_batch.get_batch(FLAGS.dataset_dir, FLAGS.num_readers, FLAGS.batch_size, ssd_shape, ssd_net, ssd_anchors, FLAGS.num_preprocessing_threads, is_training = True) with tf.device(FLAGS.gpu_train): arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay) with slim.arg_scope(arg_scope): predictions, localisations, logits, end_points = \ ssd_net.net(b_image, is_training=True) # Add loss function. total_loss = ssd_net.losses(logits, localisations, b_gclasses, b_glocalisations, b_gscores, match_threshold=FLAGS.match_threshold, negative_ratio=FLAGS.negative_ratio, alpha=FLAGS.loss_alpha, label_smoothing=FLAGS.label_smoothing) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) for loss in tf.get_collection('EXTRA_LOSSES'): summaries.add(tf.summary.scalar(loss.op.name, loss)) with tf.device(FLAGS.gpu_train): learning_rate = tf_utils.configure_learning_rate( FLAGS, FLAGS.num_samples, global_step) # Configure the optimization procedure optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) ## Training train_op = slim.learning.create_train_op(total_loss, optimizer) # =================================================================== # # Kicks off the training. # =================================================================== # gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) saver = tf.train.Saver(max_to_keep=1, keep_checkpoint_every_n_hours=1.0, write_version=2, pad_step_number=False) slim.learning.train(train_op, logdir=FLAGS.train_dir, master='', is_chief=True, init_fn=tf_utils.get_init_fn(FLAGS), number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, saver=saver, save_interval_secs=FLAGS.save_interval_secs, session_config=config, sync_optimizer=None)
def cmd(image_name): #path = '../demo/' #image_names = sorted(os.listdir(path)) #image_name="car2.jpg" #img = mpimg.imread(path + image_names[-5]) img = mpimg.imread(image_name) print("get image. Ready to process") # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. select_threshold = 0.5 nms_threshold = .45 net_shape = (300, 300) # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) centerx = visualization.get_center_x(img, rclasses, rscores, rbboxes) width = img.shape[1] #left in (0, 40%], mid in (40%, 60%), right in [60%, 100%) left = width * 0.4 right = width * 0.6 if centerx < 0: print("nothing in sight") return "stop" elif 0 < centerx and centerx <= left: return "left" elif left < centerx and centerx < right: return "middle" elif right <= centerx and centerx < width: return "right" else: print("danger centerx, out of picture") print("centerx: " + centerx) print("left: " + left) print("right: " + right) print("width: " + width) return "danger"
#net_shape = (512, 512) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() # choose net here!!! #ssd_net = ssd_vgg_512.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '/root/train/model.ckpt-1666' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape)
def ssd_eval(dataset_name, dataset_dir, batch_size, eval_dir): tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() # Dataset + SSD Model + Pre-processing dataset = dataset_factory.get_dataset(dataset_name, 'test', dataset_dir) ssd_net = ssd_vgg_300.SSDNet() ssd_shape = net_shape ssd_anchors = ssd_net.anchors(ssd_shape) # Create a dataset provider and batches with tf.device('/cpu:0'): with tf.name_scope(dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, common_queue_capacity = 2 * batch_size, common_queue_min = batch_size, shuffle = False ) [image, shape, glabels, gbboxes] = provider.get(['image', 'shape','object/label', 'object/bbox']) [gdifficults] = provider.get(['object/difficult']) image, glabels, gbboxes, gbbox_img = ssd_vgg_preprocessing.preprocess_for_eval(image, glabels, gbboxes, ssd_shape, data_format = data_format, resize = ssd_vgg_preprocessing.Resize.WARP_RESIZE) gclasses, glocalizations, gscores = ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) batch_shape = [1] * 5 + [len(ssd_anchors)] * 3 # Evaluation Batch r = tf.train.batch(reshape_list([image, glabels, gbboxes, gdifficults, gbbox_img, gclasses, glocalizations, gscores]), batch_size = batch_size, num_threads = 1, capacity = 5 * batch_size, dynamic_pad = True) (b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img, b_gclasses, b_glocalizations, b_gscores) = reshape_list(r, batch_shape) # SSD network + output decoding arg_scope = ssd_net.arg_scope(data_format= data_format) with slim.arg_scope(arg_scope): predictions, localizations, logits, _ = ssd_net.net(b_image, is_training=False) ssd_net.losses(logits, localizations, b_gclasses, b_glocalizations, b_gscores) with tf.device('/device:CPU:0'): localizations = ssd_net.bboxes_decode(localizations, ssd_anchors) rscores, rbboxes = ssd_net.detected_bboxes(predictions, localizations, select_threshold=0.01, nms_threshold=0.45, clipping_bbox=None, top_k=400, keep_top_k=200) num_gbboxes, tp, fp, rscores = tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes, b_glabels, b_gbboxes, b_gdifficults, matching_threshold= 0.5) variables_to_restore = slim.get_variables_to_restore() with tf.device('/device:CPU:0'): dict_metrics = {} for loss in tf.get_collection(tf.GraphKeys.LOSSES): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) for loss in tf.get_collection('EXTRA_LOSSES'): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) for name, metric in dict_metrics.items(): summary_name = name op = tf.summary.scalar(summary_name, metric[0], collections=[]) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores) for c in tp_fp_metric[0].keys(): dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], tp_fp_metric[1][c]) aps_VOC07 = {} aps_voc12 = {} for c in tp_fp_metric[0].keys(): # precision and recall values pre, rec = tfe.precision_recall(*tp_fp_metric[0][c]) # average precision VOC07 v = tfe.average_precision_voc07(pre,rec) summary_name = 'AP_VOC07/%s' % c op = tf.summary.scalar(summary_name, v, collections=[]) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) aps_VOC07[c] = v # Average precision VOC12. v = tfe.average_precision_voc12(pre, rec) summary_name = 'AP_VOC12/%s' % c op = tf.summary.scalar(summary_name, v, collections=[]) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) aps_voc12[c] = v # Mean average Precision VOC07 summary_name = 'AP_VOC07/mAP' mAP = tf.add_n(list(aps_VOC07.values()))/len(aps_VOC07) op = tf.summary.scalar(summary_name, mAP, collections=[]) op = tf.Print(op, [mAP], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # Mean average precision VOC12. summary_name = 'AP_VOC12/mAP' mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12) op = tf.summary.scalar(summary_name, mAP, collections=[]) op = tf.Print(op, [mAP], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(dict_metrics) # Evaluation Loop gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.9) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) num_batches = math.ceil(dataset.num_samples / float(batch_size)) tf.logging.info('Evaluating %s' % ckpt_filename) start = time.time() slim.evaluation.evaluate_once(master= '', checkpoint_path = ckpt_filename, logdir= eval_dir, num_evals= num_batches, eval_op= flatten(list(names_to_updates.values())), variables_to_restore= variables_to_restore, session_config = config) # log time spent elapsed = time.time() - start print('Time Spent: %.3f' % elapsed) print('Time Spent per batch: %.3f seconds' % (elapsed/num_batches))
def main_train(): dirname = os.path.dirname(__file__) dataset_dir = '{}/datasets/VOC_SSD'.format(dirname) ssd_net = ssd_vgg_300.SSDNet() train_params = TrainParams(weight_decay=0.00004, batch_size=32, optimizer='adam', adam_beta1=0.9, adam_beta2=0.999, opt_epsilon=1.0) ssd_params = SSDParams(loss_alpha=1., negative_ratio=3., match_threshold=0.5, label_smoothing=0.0) learning_rate_params = LearningRateParams(learning_rate=0.01, end_learning_rate=0.0001, learning_rate_decay_factor=0.94, num_epochs_per_decay=2.0) graph = tf.Graph() with graph.as_default(): with tf.device('/device:CPU:0'): global_step = tf.train.create_global_step() dataset = voc_dataset.get_dataset(dataset_dir, 'train') ssd_shape = ssd_net.params.img_shape print '---- enter anchors ----' ssd_anchors = ssd_net.anchors() print '---- end anchors ----' image_preprocessing_fn = ssd_vgg_preprocessing.preprocess_for_train provider = voc_dataset.get_dataset_provider(dataset, 4, train_params.batch_size) [image, shape, glabels, gbboxes] = provider.get(['image', 'shape', 'object/label', 'object/bbox']) print '---- from provider ----' print image print shape print glabels print gbboxes print '---- end provider ----' print '---- enter preprocessing ----' image, glabels, gbboxes = image_preprocessing_fn(image, glabels, gbboxes, ssd_shape, 'NHWC') print '---- end preprocessing ----' print '---- encode bbox ----' gclasses, glocalisations, gscores = ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) print '---- end encode ----' batch_shape = [1] + [len(ssd_anchors)] * 3 print batch_shape tensors = reshape_list([image, gclasses, glocalisations, gscores]) print 'reshape 1 ---', tensors, len(tensors) r = tf.train.batch(tensors, train_params.batch_size, num_threads=4, capacity=5 * train_params.batch_size) print 'train batch ---', r, len(r) b_image, b_gclasses, b_glocalisations, b_gscores = reshape_list(r, batch_shape) print b_image print b_gclasses print b_glocalisations print b_gscores tensors = reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]) print 'reshape 2 ---', tensors, len(tensors) batch_queue = slim.prefetch_queue.prefetch_queue(tensors, capacity=2) print 'batch queue ---', batch_queue b_image, b_gclasses, b_glocalisations, b_gscores = reshape_list(batch_queue.dequeue(), batch_shape) arg_scope = ssd_net.arg_scope(weight_decay=train_params.weight_decay) with slim.arg_scope(arg_scope): predictions, localisations, logits, end_points = ssd_net.ssd_net(b_image, is_training=True) ssd_net.losses(logits, localisations, b_gclasses, b_glocalisations, b_gscores, match_threshold=ssd_params.match_threshold, negative_ratio=ssd_params.negative_ratio, loss_alpha=ssd_params.loss_alpha, label_smoothing=ssd_params.label_smoothing) # moving_average_variables = slim.get_model_variables() # variable_averages = tf.train.ExponentialMovingAverage(0.999, global_step) decay_steps = int(dataset.num_samples / train_params.batch_size * learning_rate_params.num_epochs_per_decay) learning_rate = tf.train.exponential_decay(learning_rate_params.learning_rate, global_step, decay_steps, learning_rate_params.learning_rate_decay_factor, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate, train_params.adam_beta1, train_params.adam_beta2, train_params.opt_epsilon) print '---- loss ----' losses = tf.get_collection(tf.GraphKeys.LOSSES) print losses loss = tf.add_n(losses, name='loss') print loss regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) print regularization_losses regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') print regularization_loss sum_loss = tf.add_n([loss, regularization_loss]) print sum_loss print '---- end ----' grad = optimizer.compute_gradients(sum_loss) grad_updates = optimizer.apply_gradients(grad, global_step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print 'update ops', update_ops update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], sum_loss, name='train_op') gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=1.0) import sys # with tf.Session(config=config) as sess: # epochs = 10 # sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) # for epoch in xrange(epochs): # print '%s/%s' % (epoch + 1, epochs) # tf.train.start_queue_runners(sess=sess) # steps = int(dataset.num_samples / train_params.batch_size) # for step in xrange(steps): # sys.stdout.write('\r%s/%s' % (step + 1, steps)) # sys.stdout.flush() # _, loss = sess.run([train_tensor, sum_loss]) # print slim.learning.train(train_tensor, '{}/trained_model'.format(dirname), is_chief=True, init_fn=None, saver=saver, session_config=config)
class SSDServer: NET_SHAPE = (300, 300) SELECT_TRESHOLD = 0.5 NMS_TRESHOLD = 0.45 TOP_K = 400 ssd_net = ssd_vgg_300.SSDNet() ssd_anchors = ssd_net.anchors(NET_SHAPE) VOC_MAP = { 0: 'none', 1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', 5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair', 10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse', 14: 'motorbike', 15: 'person', 16: 'pottedplant', 17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' } def __init__(self, session, inputs, outputs): self.session = session self.inputs = inputs self.outputs = outputs @staticmethod def initialize(model_path, signature_name): session = tf.Session() meta_graph = tf.saved_model.loader.load( session, [tf.saved_model.tag_constants.SERVING], model_path) signature = meta_graph.signature_def[signature_name] inputs = {} for inp_name, inp in signature.inputs.items(): inputs[inp_name] = session.graph.get_tensor_by_name(inp.name) outputs = {} for out_name, out in signature.outputs.items(): outputs[out_name] = session.graph.get_tensor_by_name(out.name) return SSDServer(session, inputs, outputs) def preprocess(self, b64_string): decoded = base64.b64decode(b64_string) jpeg_image = io.BytesIO(decoded) return mpimg.imread(jpeg_image, format='JPG') def run(self, pic_matrix): outputs = self.session.run( self.outputs, feed_dict={self.inputs["img_input"]: pic_matrix}) rpredictions = [ outputs['ssd_300_vgg/softmax/Reshape_1:0'], outputs['ssd_300_vgg/softmax_1/Reshape_1:0'], outputs['ssd_300_vgg/softmax_2/Reshape_1:0'], outputs['ssd_300_vgg/softmax_3/Reshape_1:0'], outputs['ssd_300_vgg/softmax_4/Reshape_1:0'], outputs['ssd_300_vgg/softmax_5/Reshape_1:0'] ] rlocalisations = [ outputs['ssd_300_vgg/block4_box/Reshape:0'], outputs['ssd_300_vgg/block7_box/Reshape:0'], outputs['ssd_300_vgg/block8_box/Reshape:0'], outputs['ssd_300_vgg/block9_box/Reshape:0'], outputs['ssd_300_vgg/block10_box/Reshape:0'], outputs['ssd_300_vgg/block11_box/Reshape:0'] ] rbbox_img = outputs['bbox_img'] return rpredictions, rlocalisations, rbbox_img def postprocess(self, rpredictions, rlocalisations, rbbox_img): rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, SSDServer.ssd_anchors, select_threshold=SSDServer.SELECT_TRESHOLD, img_shape=SSDServer.NET_SHAPE, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort( rclasses, rscores, rbboxes, top_k=SSDServer.TOP_K) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=SSDServer.NMS_TRESHOLD) rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) rclasses = list(map(lambda c: SSDServer.VOC_MAP.get(c, "NA"), rclasses)) class_arr = [bytes(x, "utf-8") for x in rclasses] classes_tensor = hs.TensorProto( dtype=hs.DT_STRING, tensor_shape=hs.TensorShapeProto( dim=[hs.TensorShapeProto.Dim(size=-1)]), string_val=class_arr) scores_tensor = hs.TensorProto( dtype=hs.DT_DOUBLE, tensor_shape=hs.TensorShapeProto( dim=[hs.TensorShapeProto.Dim(size=-1)]), double_val=rscores) bboxes_tensor = hs.TensorProto(dtype=hs.DT_DOUBLE, tensor_shape=hs.TensorShapeProto(dim=[ hs.TensorShapeProto.Dim(size=-1), hs.TensorShapeProto.Dim(size=4) ]), double_val=rbboxes.flatten()) return classes_tensor, scores_tensor, bboxes_tensor
def get_object_center(q, detect_class): # classes: # 1.Aeroplanes 2.Bicycles 3.Birds 4.Boats 5.Bottles # 6.Buses 7.Cars 8.Cats 9.Chairs 10.Cows # 11.Dining tables 12.Dogs 13.Horses 14.Motorbikes 15.People # 16.Potted plants 17.Sheep 18.Sofas 19.Trains 20.TV/Monitors slim = tf.contrib.slim # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes def get_bboxes(rclasses, rbboxes): # get center location of object number_classes = rclasses.shape[0] object_bboxes = [] for i in range(number_classes): object_bbox = dict() object_bbox['i'] = i object_bbox['class'] = rclasses[i] object_bbox['y_min'] = rbboxes[i, 0] object_bbox['x_min'] = rbboxes[i, 1] object_bbox['y_max'] = rbboxes[i, 2] object_bbox['x_max'] = rbboxes[i, 3] object_bboxes.append(object_bbox) return object_bboxes # load net net = SiamRPNvot() net.load_state_dict( torch.load( join(realpath(dirname(__file__)), '../DaSiamRPN-master/code/SiamRPNVOT.model'))) net.eval() # open video capture video = cv2.VideoCapture(0) if not video.isOpened(): print("Could not open video") sys.exit() index = True while index: # Read first frame. ok, frame = video.read() if not ok: print('Cannot read video file') sys.exit() # Define an initial bounding box height = frame.shape[0] width = frame.shape[1] rclasses, rscores, rbboxes = process_image(frame) bboxes = get_bboxes(rclasses, rbboxes) for bbox in bboxes: if bbox['class'] == detect_class: print(bbox) ymin = int(bbox['y_min'] * height) xmin = int((bbox['x_min']) * width) ymax = int(bbox['y_max'] * height) xmax = int((bbox['x_max']) * width) cx = (xmin + xmax) / 2 cy = (ymin + ymax) / 2 h = ymax - ymin w = xmax - xmin new_bbox = (cx, cy, w, h) print(new_bbox) index = False break # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) state = SiamRPN_init(frame, target_pos, target_sz, net) # tracking and visualization toc = 0 count_number = 0 while True: # Read a new frame ok, frame = video.read() if not ok: break # Start timer tic = cv2.getTickCount() # Update tracker state = SiamRPN_track(state, frame) # track # print(state) toc += cv2.getTickCount() - tic if state: res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(frame, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) count_number += 1 # set object_center object_center = dict() object_center['x'] = state['target_pos'][0] / width object_center['y'] = state['target_pos'][1] / height q.put(object_center) if (not state) or count_number % 40 == 3: # Tracking failure cv2.putText(frame, "Tracking failure detected", (100, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2) index = True while index: ok, frame = video.read() rclasses, rscores, rbboxes = process_image(frame) bboxes = get_bboxes(rclasses, rbboxes) for bbox in bboxes: if bbox['class'] == detect_class: ymin = int(bbox['y_min'] * height) xmin = int(bbox['x_min'] * width) ymax = int(bbox['y_max'] * height) xmax = int(bbox['x_max'] * width) cx = (xmin + xmax) / 2 cy = (ymin + ymax) / 2 h = ymax - ymin w = xmax - xmin new_bbox = (cx, cy, w, h) target_pos, target_sz = np.array( [cx, cy]), np.array([w, h]) state = SiamRPN_init(frame, target_pos, target_sz, net) p1 = (int(xmin), int(ymin)) p2 = (int(xmax), int(ymax)) cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1) index = 0 break # 调整图片大小 resized_frame = cv2.resize(frame, None, fx=0.65, fy=0.65, interpolation=cv2.INTER_AREA) # 水平翻转图片(为了镜像显示) horizontal = cv2.flip(resized_frame, 1, dst=None) # 显示图片 cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL) cv2.imshow('SSD+SiamRPN', horizontal) # Exit if ESC pressed k = cv2.waitKey(1) & 0xff if k == 27: break video.release() cv2.destroyAllWindows()
#ckpt_filename = '../logs/ssd_300_kitti_13/model.ckpt-149757' # In[18]: # Image pre-processimg out_shape = (None, None) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( image, labels, bboxes, out_shape, resize=ssd_vgg_preprocessing.Resize.NONE) image_4d = tf.expand_dims(image_pre, 0) # SSD construction. reuse = True if 'ssd' in locals() else None params = ssd_vgg_300.SSDNet.default_params params = params._replace(num_classes=8) ssd = ssd_vgg_300.SSDNet(params) with slim.arg_scope(ssd.arg_scope(weight_decay=0.0005)): predictions, localisations, logits, end_points = ssd.net(image_4d, is_training=False, reuse=reuse) # In[20]: # Initialize variables. init_op = tf.global_variables_initializer() sess.run(init_op) # Restore SSD model. saver = tf.train.Saver() saver.restore(sess, ckpt_filename) # In[24]:
def __init__(self, figsize=(10, 10)): config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) self._sess = tf.Session(config=config) self._cv_bridge = CvBridge() self.distance = [] self.center = 0.0 self.depth_horizon = 59 self.robot_x = 0.0 self.robot_y = 0.0 self.banana_data = [] self.apple_data = [] self.cable_data = [] self.latest_cable_x = -100.0 self.latest_cable_y = -100.0 self.r = 0.3 self.data_count = [] self._tf_listener = rostf.TransformListener() self._latest_point = None net_shape = (300, 300) data_format = 'NHWC' self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = \ ssd_vgg_preprocessing.preprocess_for_eval( self.img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) self.image_4d = tf.expand_dims(self.image_pre, 0) self.ssd_net = ssd_vgg_300.SSDNet() with tf.contrib.slim.arg_scope( self.ssd_net.arg_scope(data_format=data_format)): self.predictions, self.localisations, _, _ = self.ssd_net.net( self.image_4d, is_training=False) # Restore SSD model. ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' self._sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(self._sess, ckpt_filename) # SSD default anchor boxes. self._ssd_anchors = self.ssd_net.anchors(net_shape) #self._sub = rospy.Subscriber('camera/rgb/image_color/compressed', CompressedImage, self.callback, queue_size=1) self._pub_img = rospy.Publisher('ssd_image', Image, queue_size=1) self._pub_rslt = rospy.Publisher('ssd_result', Float32MultiArray, queue_size=1) self._pub_detect = rospy.Publisher('detect', Float32MultiArray, queue_size=1) self._pub_banana_point = rospy.Publisher('banana_point', Float32MultiArray, queue_size=1) self._pub_apple_point = rospy.Publisher('apple_point', Float32MultiArray, queue_size=1) self._pub_cable_point = rospy.Publisher('cable_point', Float32MultiArray, queue_size=1) sub_rgb = message_filters.Subscriber( "camera/rgb/image_color/compressed", CompressedImage, queue_size=1, buff_size=2**24) sub_depth = message_filters.Subscriber("camera/depth/image", Image, queue_size=1, buff_size=2**24) self.mf = message_filters.ApproximateTimeSynchronizer( [sub_rgb, sub_depth], 1, 10.0) #2:queuesize 3:datanodoukinokyoyouhanni self.mf.registerCallback(self.callback) self.marker_pub = rospy.Publisher("test_text", Marker, queue_size=10) self.marker_voting_cable_pub = rospy.Publisher("voting_cable_point", Marker, queue_size=10) self.marker_voting_banana_pub = rospy.Publisher("voting_banana_point", Marker, queue_size=10) self.marker_voting_apple_pub = rospy.Publisher("voting_apple_point", Marker, queue_size=10) self._colors = [] for i in xrange(21): _color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) self._colors.append(_color)
def inference(input=0,inputType=1): slim = tf.contrib.slim sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes # input is a image inputType = int(inputType) if inputType is 1: if input == 0: print("At least indicate 1 input video") exit(-1) # Test on some demo image and visualize output. img = mpimg.imread(input) rclasses, rscores, rbboxes = process_image(img) # Find the name of the category num print(list(map(lambda i:"{}:{}".format(i,category[i]),list(rclasses)))) rclasses = np.array(list(map(lambda i:"{}:{}".format(i,category[i]),list(rclasses)))) # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) # plot the image directly visualization.plt_bboxes(img, rclasses, rscores, rbboxes) elif inputType == 2: # input is the video # plot the boxes into the image cap = cv2.VideoCapture(input) fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) fourcc = cap.get(cv2.CAP_PROP_FOURCC) #fourcc = cv2.CAP_PROP_FOURCC(*'CVID') print('fps=%d,size=%r,fourcc=%r'%(fps,size,fourcc)) delay=10/int(fps) print(delay) if delay <= 1: delay = 1 while (cap.isOpened()): ret, frame = cap.read() print(ret) if ret == True: image = frame # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = image # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. rclasses, rscores, rbboxes = process_image(image_np) #print(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses)))) rclasses = np.array(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses)))) # Visualization of the results of a detection. visualization.bboxes_draw_on_img(image_np, rclasses, rscores, rbboxes) cv2.imshow('frame', image_np) #cv2.waitKey(np.uint(delay)) if cv2.waitKey(delay) & 0xFF == ord('q'): break print('Ongoing...') else: break cap.release() cv2.destroyAllWindows() elif inputType ==3: print("save video") if input == 0: print("At least indicate 1 input video") exit(-1) def save_image(image_np): rclasses, rscores, rbboxes = process_image(image_np) # print(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses)))) rclasses = np.array(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses)))) visualization.bboxes_draw_on_img(image_np, rclasses, rscores, rbboxes) return image_np from moviepy.editor import VideoFileClip cap = cv2.VideoCapture(input) fps = cap.get(cv2.CAP_PROP_FPS) cap.release() cv2.destroyAllWindows() video = VideoFileClip(input) result = video.fl_image(save_image) output = os.path.join("./videos/output_{}".format(input.split("/")[-1])) result.write_videofile(output, fps=fps) else: cap = cv2.VideoCapture(0) while (True): # Capture frame-by-frame ret, frame = cap.read() #cv2.imshow('frame', frame) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(frame, axis=0) # Actual detection. rclasses, rscores, rbboxes = process_image(frame) # print(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses)))) rclasses = np.array(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses)))) # Visualization of the results of a detection. visualization.bboxes_draw_on_img(frame, rclasses, rscores, rbboxes) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### ssd_model = ssd_vgg_300.SSDNet() ssd_model.set_batch_size(FLAGS.batch_size) network_fn = nets_factory.get_network_fn(ssd_model, is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, labels, bboxes] = provider.get(['image', 'object/label', 'object/bbox']) labels -= FLAGS.labels_offset if FLAGS.remove_difficult: difficults_gt = provider.get(['object/difficult']) else: difficults_gt = tf.zeros(tf.shape(labels), dtype=tf.int64) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name) eval_image_size_height = FLAGS.eval_image_size_height or ssd_model.ssd_params.image_size[ 0] eval_image_size_width = FLAGS.eval_image_size_width or ssd_model.ssd_params.image_size[ 1] image, labels_gt, bboxes_gt = image_preprocessing_fn( image, labels, bboxes, eval_image_size_height, eval_image_size_width, data_format=DATA_FORMAT, is_training=False) anchors = ssd_model.anchors_for_all_layer() labels_en, scores_en, bboxes_en = ssd_model.bboxes_encode( anchors, labels_gt, bboxes_gt) images, labels_gt, bboxes_gt, difficults_gt, labels_en, scores_en, bboxes_en = \ tf.train.batch( [image, labels_gt, bboxes_gt, difficults_gt,labels_en, scores_en, bboxes_en], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size, dynamic_pad=True) ################################ # SSD Model + outputs decoding # ################################ logits, locs, endpoints = network_fn(images) ssd_model.ssd_class_and_loc_losses(logits, locs, labels_en, bboxes_en, scores_en) # Performing post_processing on CPU: loop-intensive, usually more efficient. with tf.device('/device:CPU:0'): # Detect objects from SSD Model outputs locs_aggr = ssd_model.bboxes_decode(locs, anchors) scores_nms, bboxes_nms = ssd_model.detected_bboxes( logits, locs_aggr, FLAGS.select_threshold, FLAGS.nms_threshold, FLAGS.select_top_k, FLAGS.keep_top_k) num_bboxes_gt, tp, fp = bboxes_matching_batch( scores_nms.keys(), scores_nms, bboxes_nms, labels_gt, bboxes_gt, difficults_gt, matching_threshold=FLAGS.matching_threshold) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() # Define the metrics: with tf.device('/device:CPU:0'): dict_metrics = {} # First add all losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) # Extra losses as well. for loss in tf.get_collection('EXTRA_LOSSES'): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) # Add metrics to summaries and Print on screen. for name, metric in dict_metrics.items(): # summary_name = 'eval/%s' % name summary_name = name op = tf.summary.scalar(summary_name, metric[0], collections=[]) # op = tf.Print(op, [metric[0]], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # FP and TP metrics. tp_fp_metric = streaming_tp_fp_arrays(num_bboxes_gt, tp, fp, scores_nms) for c in tp_fp_metric[0].keys(): dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], tp_fp_metric[1][c]) # Add to summaries precision/recall values. aps_voc12 = {} for c in tp_fp_metric[0].keys(): # Precison and recall values. prec, rec = precision_recall(*tp_fp_metric[0][c]) # Average precision VOC12. v = average_precision_voc12(prec, rec) summary_name = 'AP_VOC12/%s' % c op = tf.summary.scalar(summary_name, v, collections=[]) # op = tf.Print(op, [v], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) aps_voc12[c] = v # Mean average precision VOC12. summary_name = 'AP_VOC12/mAP' mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12) op = tf.summary.scalar(summary_name, mAP, collections=[]) op = tf.Print(op, [mAP], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # Split into values and updates ops. names_to_values, names_to_updates = slim.metrics.aggregate_metric_map( dict_metrics) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. #ckpt_filename = 'finetune_log/model.ckpt-41278' ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape)
def __init__(self): tf.reset_default_graph() # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) graph = self.load_graph( os.path.join(self.MODEL_DIR, self.FROZEN_MODEL_NAME)) graph_def = graph.as_graph_def() # print operations self.print_graph_operations(graph) # print nodes #print_graph_nodes(graph_def) #################### self.input_x = graph.get_tensor_by_name('prefix/input_x:0') # 非常に粗いconv10_2とconv11_2を削ってもよい self.predictions = [ graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block4_cls_pred/softmax/Reshape_1:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block7_cls_pred/softmax/Reshape_1:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block8_cls_pred/softmax/Reshape_1:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block9_cls_pred/softmax/Reshape_1:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block10_cls_pred/softmax/Reshape_1:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block11_cls_pred/softmax/Reshape_1:0') ] self.localisations = [ graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block4_box/loc_pred:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block7_box/loc_pred:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block8_box/loc_pred:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block9_box/loc_pred:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block10_box/loc_pred:0'), graph.get_tensor_by_name( 'prefix/ssd_300_vgg/block11_box/loc_pred:0') ] self.bbox_img = graph.get_tensor_by_name( 'prefix/ssd_preprocessing_train/my_bbox_img/strided_slice:0') # SSD default anchor boxes. net_shape = (300, 300) ssd_class = nets_factory.get_network('ssd_300_vgg') ssd_params = ssd_class.default_params._replace(num_classes=5) ssd_net = ssd_vgg_300.SSDNet(ssd_params) self.ssd_anchors = ssd_net.anchors(net_shape) ######################################## # ラベル ######################################## self.VOC_LABELS = { 0: 'none', 1: 'stop', 2: 'speed_10', 3: 'speed_20', 4: 'speed_30', } self.colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for i in range(len(self.VOC_LABELS))] self.sess = tf.Session(graph=graph, config=config)