def load_ssd_net(checkpoint, net_shape=(300, 300), data_format='NHWC'): # Input placeholder. img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = \ ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net_dict = {300: ssd_vgg_300.SSDNet, 512: ssd_vgg_512.SSDNet} ssd_net = ssd_net_dict[net_shape[0]]() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net( image_4d, is_training=False, reuse=reuse) # Restore SSD model. sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, checkpoint) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Return dictionary. return_dict = {'img_input': img_input, 'image_4d': image_4d, 'predictions': predictions, 'localisations': localisations, 'bbox_img': bbox_img, 'ssd_anchors': ssd_anchors} return return_dict
from struct import * ##GPU, SSD network, TF Session: Restore a checkpoint and keep a Session for all threads...## # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options, device_count={'GPU': 2}) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = \ ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. ssd_time = time.time() reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) print('ssd_net_time: ', time.time() - ssd_time) restore_time = time.time() #TODO: ckpt_filename is from sys.argv # Restore SSD model. # ckpt_filename = './checkpoints/ssd_300_vgg.ckpt'
def main(argv): # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) sess = tf.Session(config=config) # isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Test on some demo image and visualize output. # import ipdb; ipdb.set_trace() if argv.output_dir is None: output_p_dir = os.path.dirname(argv.video_path) file_name = os.path.basename(argv.video_path).split('.')[0] output_dir = os.path.join(output_p_dir, file_name) if not os.path.isdir(output_dir): os.mkdir(output_dir) else: if not os.path.isdir(argv.output_dir): assert 0, "output_dir: {} is not exist.".format(argv.output_dir) output_dir = argv.output_dir cap = cv2.VideoCapture(argv.video_path) num_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT) count = 0 name_template = 'frame-%06d.txt' while (cap.isOpened()): ret, img = cap.read() assert ret, "Close video" rclasses, rscores, rbboxes = process_image(sess, img, image_4d, predictions, localisations, bbox_img, img_input, ssd_anchors) name = os.path.join(output_dir, name_template % count) visualization.save_bboxes_imgs_to_file(name, img, rclasses, rscores, rbboxes) count += 1 print("Detection frame [%d/%d]\r" % (count, num_frame), end="")
import matplotlib.image as mpimg import sys sys.path.append('../') from nets import ssd_vgg_300, ssd_common, np_methods from preprocessing import ssd_vgg_preprocessing from notebooks import visualization # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession (config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, (None, None), resize=ssd_vgg_preprocessing.Resize.NONE) #by3ml whetining lel image b2no y4el mn 2l sora means w resize leha w byzbt 7ga fl bbox image_4d = tf.expand_dims(image_pre, 0)#[1, height, width, channels] # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '/home/sarah/SSD-Tensorflow/checkpoints/model.ckpt-150000' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename)
[2, .5, 3, 1. / 3], [2, .5], [2, .5]], anchor_steps=[8, 16, 32, 64], anchor_offset=0.5, normalizations=[20, -1, -1, -1, -1, -1], prior_scaling=[0.1, 0.1, 0.2, 0.2]) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) large = tf.placeholder(tf.bool) medium = tf.placeholder(tf.bool) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, (300, 300), data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE, large=large, medium=medium) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None #reuse =True ssd_net = ssd_vgg_300.SSDNet(default_params) with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=tf.AUTO_REUSE, large=large, medium=medium)
def set_centers(): print("开启线程:将object_centers放入queue") # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' ckpt_filename = 'checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes def get_centers(rclasses, rbboxes): # get center location of object number_classes = rclasses.shape[0] object_centers = [] for i in range(number_classes): object_center = dict() object_center['i'] = i object_center['class'] = rclasses[i] object_center['x'] = (rbboxes[i, 1] + rbboxes[i, 3]) / 2 # 对象中心的坐标x object_center['y'] = (rbboxes[i, 0] + rbboxes[i, 2]) / 2 # 对象中心的坐标y object_centers.append(object_center) return object_centers count = 0 cap = cv2.VideoCapture(0) while count < 100: # 打开摄像头 ret, img = cap.read() rclasses, rscores, rbboxes = process_image(img) ''' classes: 1.Aeroplanes 2.Bicycles 3.Birds 4.Boats 5.Bottles 6.Buses 7.Cars 8.Cats 9.Chairs 10.Cows 11.Dining tables 12.Dogs 13.Horses 14.Motorbikes 15.People 16.Potted plants 17.Sheep 18.Sofas 19.Trains 20.TV/Monitors ''' object_centers = get_centers(rclasses, rbboxes) # print("put object centers: " + str(object_centers)) for object_center in object_centers: if object_center['class'] == 5 or object_center['class'] == 7: new_object_center = object_center q.put(new_object_center) count += 1 break print("完成输入") cap.release()
from preprocessing import ssd_vgg_preprocessing ckpt_filename = '/mogu/liubang/mytf/SSD-Tensorflow/logs2/model.ckpt-122449' NUM = 7 # SSD object. reuse = True if 'ssd' in locals() else None params = ssd_vgg_512.SSDNet.default_params ssd_params = params._replace(num_classes=NUM) ssd = ssd_vgg_512.SSDNet(ssd_params) # Image pre-processimg out_shape = ssd.params.img_shape img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = \ ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, out_shape, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # SSD construction. with slim.arg_scope(ssd.arg_scope(weight_decay=0.0005)): predictions, localisations, logits, end_points = ssd.net(image_4d, is_training=False, reuse=reuse) # SSD default anchor boxes. img_shape = out_shape layers_anchors = ssd.anchors(img_shape, dtype=np.float32) for k in sorted(end_points.keys()): print(k, end_points[k].get_shape())
def demo6(): ttf_img, _, _, _ = ssd_vgg_preprocessing.preprocess_for_eval(tf.constant(misc.imread('dog.jpg'), dtype=tf.uint8), None, None, (300,300), 'NHWC', resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) isess = tf.Session() rtfimg = isess.run([ttf_img]) print rtfimg[0] """
def ssd_eval(dataset_name, dataset_dir, batch_size, eval_dir): tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() # Dataset + SSD Model + Pre-processing dataset = dataset_factory.get_dataset(dataset_name, 'test', dataset_dir) ssd_net = ssd_vgg_300.SSDNet() ssd_shape = net_shape ssd_anchors = ssd_net.anchors(ssd_shape) # Create a dataset provider and batches with tf.device('/cpu:0'): with tf.name_scope(dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, common_queue_capacity = 2 * batch_size, common_queue_min = batch_size, shuffle = False ) [image, shape, glabels, gbboxes] = provider.get(['image', 'shape','object/label', 'object/bbox']) [gdifficults] = provider.get(['object/difficult']) image, glabels, gbboxes, gbbox_img = ssd_vgg_preprocessing.preprocess_for_eval(image, glabels, gbboxes, ssd_shape, data_format = data_format, resize = ssd_vgg_preprocessing.Resize.WARP_RESIZE) gclasses, glocalizations, gscores = ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) batch_shape = [1] * 5 + [len(ssd_anchors)] * 3 # Evaluation Batch r = tf.train.batch(reshape_list([image, glabels, gbboxes, gdifficults, gbbox_img, gclasses, glocalizations, gscores]), batch_size = batch_size, num_threads = 1, capacity = 5 * batch_size, dynamic_pad = True) (b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img, b_gclasses, b_glocalizations, b_gscores) = reshape_list(r, batch_shape) # SSD network + output decoding arg_scope = ssd_net.arg_scope(data_format= data_format) with slim.arg_scope(arg_scope): predictions, localizations, logits, _ = ssd_net.net(b_image, is_training=False) ssd_net.losses(logits, localizations, b_gclasses, b_glocalizations, b_gscores) with tf.device('/device:CPU:0'): localizations = ssd_net.bboxes_decode(localizations, ssd_anchors) rscores, rbboxes = ssd_net.detected_bboxes(predictions, localizations, select_threshold=0.01, nms_threshold=0.45, clipping_bbox=None, top_k=400, keep_top_k=200) num_gbboxes, tp, fp, rscores = tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes, b_glabels, b_gbboxes, b_gdifficults, matching_threshold= 0.5) variables_to_restore = slim.get_variables_to_restore() with tf.device('/device:CPU:0'): dict_metrics = {} for loss in tf.get_collection(tf.GraphKeys.LOSSES): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) for loss in tf.get_collection('EXTRA_LOSSES'): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) for name, metric in dict_metrics.items(): summary_name = name op = tf.summary.scalar(summary_name, metric[0], collections=[]) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores) for c in tp_fp_metric[0].keys(): dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], tp_fp_metric[1][c]) aps_VOC07 = {} aps_voc12 = {} for c in tp_fp_metric[0].keys(): # precision and recall values pre, rec = tfe.precision_recall(*tp_fp_metric[0][c]) # average precision VOC07 v = tfe.average_precision_voc07(pre,rec) summary_name = 'AP_VOC07/%s' % c op = tf.summary.scalar(summary_name, v, collections=[]) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) aps_VOC07[c] = v # Average precision VOC12. v = tfe.average_precision_voc12(pre, rec) summary_name = 'AP_VOC12/%s' % c op = tf.summary.scalar(summary_name, v, collections=[]) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) aps_voc12[c] = v # Mean average Precision VOC07 summary_name = 'AP_VOC07/mAP' mAP = tf.add_n(list(aps_VOC07.values()))/len(aps_VOC07) op = tf.summary.scalar(summary_name, mAP, collections=[]) op = tf.Print(op, [mAP], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # Mean average precision VOC12. summary_name = 'AP_VOC12/mAP' mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12) op = tf.summary.scalar(summary_name, mAP, collections=[]) op = tf.Print(op, [mAP], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(dict_metrics) # Evaluation Loop gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.9) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) num_batches = math.ceil(dataset.num_samples / float(batch_size)) tf.logging.info('Evaluating %s' % ckpt_filename) start = time.time() slim.evaluation.evaluate_once(master= '', checkpoint_path = ckpt_filename, logdir= eval_dir, num_evals= num_batches, eval_op= flatten(list(names_to_updates.values())), variables_to_restore= variables_to_restore, session_config = config) # log time spent elapsed = time.time() - start print('Time Spent: %.3f' % elapsed) print('Time Spent per batch: %.3f seconds' % (elapsed/num_batches))
def __init__(self, figsize=(10, 10)): config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) self._sess = tf.Session(config=config) self._cv_bridge = CvBridge() self.distance = [] self.center = 0.0 self.depth_horizon = 59 self.robot_x = 0.0 self.robot_y = 0.0 self.banana_data = [] self.apple_data = [] self.cable_data = [] self.latest_cable_x = -100.0 self.latest_cable_y = -100.0 self.r = 0.3 self.data_count = [] self._tf_listener = rostf.TransformListener() self._latest_point = None net_shape = (300, 300) data_format = 'NHWC' self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = \ ssd_vgg_preprocessing.preprocess_for_eval( self.img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) self.image_4d = tf.expand_dims(self.image_pre, 0) self.ssd_net = ssd_vgg_300.SSDNet() with tf.contrib.slim.arg_scope( self.ssd_net.arg_scope(data_format=data_format)): self.predictions, self.localisations, _, _ = self.ssd_net.net( self.image_4d, is_training=False) # Restore SSD model. ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' self._sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(self._sess, ckpt_filename) # SSD default anchor boxes. self._ssd_anchors = self.ssd_net.anchors(net_shape) #self._sub = rospy.Subscriber('camera/rgb/image_color/compressed', CompressedImage, self.callback, queue_size=1) self._pub_img = rospy.Publisher('ssd_image', Image, queue_size=1) self._pub_rslt = rospy.Publisher('ssd_result', Float32MultiArray, queue_size=1) self._pub_detect = rospy.Publisher('detect', Float32MultiArray, queue_size=1) self._pub_banana_point = rospy.Publisher('banana_point', Float32MultiArray, queue_size=1) self._pub_apple_point = rospy.Publisher('apple_point', Float32MultiArray, queue_size=1) self._pub_cable_point = rospy.Publisher('cable_point', Float32MultiArray, queue_size=1) sub_rgb = message_filters.Subscriber( "camera/rgb/image_color/compressed", CompressedImage, queue_size=1, buff_size=2**24) sub_depth = message_filters.Subscriber("camera/depth/image", Image, queue_size=1, buff_size=2**24) self.mf = message_filters.ApproximateTimeSynchronizer( [sub_rgb, sub_depth], 1, 10.0) #2:queuesize 3:datanodoukinokyoyouhanni self.mf.registerCallback(self.callback) self.marker_pub = rospy.Publisher("test_text", Marker, queue_size=10) self.marker_voting_cable_pub = rospy.Publisher("voting_cable_point", Marker, queue_size=10) self.marker_voting_banana_pub = rospy.Publisher("voting_banana_point", Marker, queue_size=10) self.marker_voting_apple_pub = rospy.Publisher("voting_apple_point", Marker, queue_size=10) self._colors = [] for i in xrange(21): _color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) self._colors.append(_color)
def ssd_test(path): # Input Placeholder img_input = tf.placeholder(tf.uint8, shape= (None, None, 3)) # Evaluation pre-processing: resize to ssd net shape image_pre, labels_pre, bbox_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize= ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, axis=0) # Define the SSD model reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format= data_format)): predictions, localizations, _, _ = ssd_net.net(image_4d, is_training= False, reuse = reuse) # SSD default anchor boxes ssd_anchors = ssd_net.anchors(net_shape) # Main image processing pipeline # Tensorflow Session: grow memeory when needed, do not allow full GPU usage gpu_options = tf.GPUOptions(allow_growth = True) config = tf.ConfigProto(log_device_placement = False, gpu_options = gpu_options) isess = tf.InteractiveSession(config = config) # Restore the SSD model isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # Run the SSD network def post_process(img, select_thresh=0.5, nms_thresh=0.45): rimg, rpredictions, rlocalizations, rbbox_img = isess.run([image_4d, predictions, localizations, bbox_img], feed_dict= {img_input: img}) # get the classes and bboxes from the output rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(rpredictions, rlocalizations, ssd_anchors, select_threshold=select_thresh, img_shape = net_shape, num_classes = 21, decode = True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k = 400) rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold = nms_thresh) # Resize the bboxes to the original image sizes, but useless for Resize.WARP rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes imgs = os.listdir(path) for i in range(len(imgs)): img_path = os.path.join(path, imgs[i]) img = mpimg.imread(img_path) rclasses, rscores, rbboxes = post_process(img) visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
def get_object_center(q, detect_class): # classes: # 1.Aeroplanes 2.Bicycles 3.Birds 4.Boats 5.Bottles # 6.Buses 7.Cars 8.Cats 9.Chairs 10.Cows # 11.Dining tables 12.Dogs 13.Horses 14.Motorbikes 15.People # 16.Potted plants 17.Sheep 18.Sofas 19.Trains 20.TV/Monitors slim = tf.contrib.slim # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt' ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) return rclasses, rscores, rbboxes def get_bboxes(rclasses, rbboxes): # get center location of object number_classes = rclasses.shape[0] object_bboxes = [] for i in range(number_classes): object_bbox = dict() object_bbox['i'] = i object_bbox['class'] = rclasses[i] object_bbox['y_min'] = rbboxes[i, 0] object_bbox['x_min'] = rbboxes[i, 1] object_bbox['y_max'] = rbboxes[i, 2] object_bbox['x_max'] = rbboxes[i, 3] object_bboxes.append(object_bbox) return object_bboxes # load net net = SiamRPNvot() net.load_state_dict( torch.load( join(realpath(dirname(__file__)), '../DaSiamRPN-master/code/SiamRPNVOT.model'))) net.eval() # open video capture video = cv2.VideoCapture(0) if not video.isOpened(): print("Could not open video") sys.exit() index = True while index: # Read first frame. ok, frame = video.read() if not ok: print('Cannot read video file') sys.exit() # Define an initial bounding box height = frame.shape[0] width = frame.shape[1] rclasses, rscores, rbboxes = process_image(frame) bboxes = get_bboxes(rclasses, rbboxes) for bbox in bboxes: if bbox['class'] == detect_class: print(bbox) ymin = int(bbox['y_min'] * height) xmin = int((bbox['x_min']) * width) ymax = int(bbox['y_max'] * height) xmax = int((bbox['x_max']) * width) cx = (xmin + xmax) / 2 cy = (ymin + ymax) / 2 h = ymax - ymin w = xmax - xmin new_bbox = (cx, cy, w, h) print(new_bbox) index = False break # tracker init target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) state = SiamRPN_init(frame, target_pos, target_sz, net) # tracking and visualization toc = 0 count_number = 0 while True: # Read a new frame ok, frame = video.read() if not ok: break # Start timer tic = cv2.getTickCount() # Update tracker state = SiamRPN_track(state, frame) # track # print(state) toc += cv2.getTickCount() - tic if state: res = cxy_wh_2_rect(state['target_pos'], state['target_sz']) res = [int(l) for l in res] cv2.rectangle(frame, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3) count_number += 1 # set object_center object_center = dict() object_center['x'] = state['target_pos'][0] / width object_center['y'] = state['target_pos'][1] / height q.put(object_center) if (not state) or count_number % 40 == 3: # Tracking failure cv2.putText(frame, "Tracking failure detected", (100, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2) index = True while index: ok, frame = video.read() rclasses, rscores, rbboxes = process_image(frame) bboxes = get_bboxes(rclasses, rbboxes) for bbox in bboxes: if bbox['class'] == detect_class: ymin = int(bbox['y_min'] * height) xmin = int(bbox['x_min'] * width) ymax = int(bbox['y_max'] * height) xmax = int(bbox['x_max'] * width) cx = (xmin + xmax) / 2 cy = (ymin + ymax) / 2 h = ymax - ymin w = xmax - xmin new_bbox = (cx, cy, w, h) target_pos, target_sz = np.array( [cx, cy]), np.array([w, h]) state = SiamRPN_init(frame, target_pos, target_sz, net) p1 = (int(xmin), int(ymin)) p2 = (int(xmax), int(ymax)) cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1) index = 0 break # 调整图片大小 resized_frame = cv2.resize(frame, None, fx=0.65, fy=0.65, interpolation=cv2.INTER_AREA) # 水平翻转图片(为了镜像显示) horizontal = cv2.flip(resized_frame, 1, dst=None) # 显示图片 cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL) cv2.imshow('SSD+SiamRPN', horizontal) # Exit if ESC pressed k = cv2.waitKey(1) & 0xff if k == 27: break video.release() cv2.destroyAllWindows()
def cmd(image_name): #path = '../demo/' #image_names = sorted(os.listdir(path)) #image_name="car2.jpg" #img = mpimg.imread(path + image_names[-5]) img = mpimg.imread(image_name) print("get image. Ready to process") # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. select_threshold = 0.5 nms_threshold = .45 net_shape = (300, 300) # Run SSD network. rimg, rpredictions, rlocalisations, rbbox_img = isess.run( [image_4d, predictions, localisations, bbox_img], feed_dict={img_input: img}) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( rpredictions, rlocalisations, ssd_anchors, select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) centerx = visualization.get_center_x(img, rclasses, rscores, rbboxes) width = img.shape[1] #left in (0, 40%], mid in (40%, 60%), right in [60%, 100%) left = width * 0.4 right = width * 0.6 if centerx < 0: print("nothing in sight") return "stop" elif 0 < centerx and centerx <= left: return "left" elif left < centerx and centerx < right: return "middle" elif right <= centerx and centerx < width: return "right" else: print("danger centerx, out of picture") print("centerx: " + centerx) print("left: " + left) print("right: " + right) print("width: " + width) return "danger"
from preprocessing import ssd_vgg_preprocessing # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_300_vgg' in locals() else None ssd_params = ssd_vgg_300.SSDNet.default_params._replace(num_classes=4) ssd_net = ssd_vgg_300.SSDNet(ssd_params) with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) summaries.add(tf.summary.image("Image", image_4d))
p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) # Draw text... s = '%s/%.3f' % (classes[i], scores[i]) p1 = (p1[0] - 5, p1[1]) cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1) reuse = True if 'ssd' in locals() else None # Input placeholder. net_shape = (512, 512) img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) image_pre, labels_pre, bboxes_pre, bbox_img = ssd_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, resize=ssd_preprocessing.Resize.PAD_AND_RESIZE) image_4d = tf.expand_dims(image_pre, 0) params = ssd_vgg_512.SSDNet.default_params ssd = ssd_vgg_512.SSDNet(params) layers_anchors = ssd.anchors(net_shape, dtype=np.float32) # Re-define the model with slim.arg_scope(ssd.arg_scope(weight_decay=0.0005)): predictions, localisations, logits, end_points = ssd.net(image_4d, is_training=False, reuse=reuse) localisations = ssd.bboxes_decode(localisations, layers_anchors)
# ## SSD 300 Model # # The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation). # # SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors. # Input placeholder. #net_shape = (300, 300) net_shape = (shapeWidth, shapeHeight) data_format = 'NHWC' #'NHWC' #'NCHW' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None if shapeWidth==300: ssd_net = ssd_vgg_300.SSDNet() else: ssd_net = ssd_vgg_512.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. if shapeWidth==300:
def main(): # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (300, 300) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. reuse = True if 'ssd_net' in locals() else None ssd_net = ssd_vgg_300.SSDNet() with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) # Restore SSD model. ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' isess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) # SSD default anchor boxes. ssd_anchors = ssd_net.anchors(net_shape) # Main image processing routine. print("init ready. net restored") # Test on some demo image and visualize output. tcp_socket = socket(AF_INET, SOCK_STREAM) tcp_socket.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) # 固定端口号 tcp_socket.bind(("",12345)) # 将主动套接字转为被动套接字 tcp_socket.listen(128) print("ready to receive") while True: client_socket,client_addr = tcp_socket.accept() #conn, addr = s.accept() print ('Accept new connection from ', client_addr) new_filename = "" while True: fileinfo_size = struct.calcsize('128sl') buf = client_socket.recv(fileinfo_size) if buf: filename, filesize = struct.unpack('128sl', buf) #print(filename) fn = filename.decode().strip('\00') new_filename = os.path.join('./', fn) print(fn) print ('file new name is '+new_filename+ ', filesize is '+str(filesize)) recvd_size = 0 # 定义已接收文件的大小 fp = open(new_filename, 'wb') print('start receiving...') while not recvd_size == filesize: if filesize - recvd_size > 1024: data = client_socket.recv(1024) recvd_size += len(data) else: data = client_socket.recv(filesize - recvd_size) recvd_size = filesize fp.write(data) fp.close() print('end receive...') #client_socket.send(commamd.encode()) cmd_str = get_cmd(isess, ssd_anchors, image_4d, predictions, localisations, bbox_img, img_input, new_filename) client_socket.send(cmd_str.encode()) print("end sending") client_socket.close() break