Example #1
0
def load_ssd_net(checkpoint, net_shape=(300, 300), data_format='NHWC'):

    # Input placeholder.
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))

    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = \
        ssd_vgg_preprocessing.preprocess_for_eval(
            img_input, None, None, net_shape, data_format,
            resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net_dict = {300: ssd_vgg_300.SSDNet, 512: ssd_vgg_512.SSDNet}
    ssd_net = ssd_net_dict[net_shape[0]]()

    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(
            image_4d, is_training=False, reuse=reuse)

    # Restore SSD model.
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Return dictionary.
    return_dict = {'img_input': img_input,
                   'image_4d': image_4d,
                   'predictions': predictions,
                   'localisations': localisations,
                   'bbox_img': bbox_img,
                   'ssd_anchors': ssd_anchors}
    return return_dict
Example #2
0
from struct import *

##GPU, SSD network, TF Session: Restore a checkpoint and keep a Session for all threads...##
# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options, device_count={'GPU': 2})
isess = tf.InteractiveSession(config=config)
# Input placeholder.
net_shape = (300, 300)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = \
    ssd_vgg_preprocessing.preprocess_for_eval(img_input,
                                              None,
                                              None,
                                              net_shape,
                                              data_format,
                                              resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
ssd_time = time.time()
reuse = True if 'ssd_net' in locals() else None
ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)
print('ssd_net_time: ', time.time() - ssd_time)
restore_time = time.time()
#TODO: ckpt_filename is from sys.argv
# Restore SSD model.
# ckpt_filename = './checkpoints/ssd_300_vgg.ckpt'
Example #3
0
def main(argv):
    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    sess = tf.Session(config=config)
    # isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
    # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Test on some demo image and visualize output.
    # import ipdb; ipdb.set_trace()
    if argv.output_dir is None:
        output_p_dir = os.path.dirname(argv.video_path)
        file_name = os.path.basename(argv.video_path).split('.')[0]
        output_dir = os.path.join(output_p_dir, file_name)
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
    else:
        if not os.path.isdir(argv.output_dir):
            assert 0, "output_dir: {} is not exist.".format(argv.output_dir)
        output_dir = argv.output_dir
    cap = cv2.VideoCapture(argv.video_path)
    num_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    count = 0
    name_template = 'frame-%06d.txt'
    while (cap.isOpened()):
        ret, img = cap.read()
        assert ret, "Close video"
        rclasses, rscores, rbboxes = process_image(sess, img, image_4d,
                                                   predictions, localisations,
                                                   bbox_img, img_input,
                                                   ssd_anchors)
        name = os.path.join(output_dir, name_template % count)
        visualization.save_bboxes_imgs_to_file(name, img, rclasses, rscores,
                                               rbboxes)
        count += 1
        print("Detection frame [%d/%d]\r" % (count, num_frame), end="")
Example #4
0
import matplotlib.image as mpimg
import sys
sys.path.append('../')
from nets import ssd_vgg_300, ssd_common, np_methods
from preprocessing import ssd_vgg_preprocessing
from notebooks import visualization
# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
isess = tf.InteractiveSession (config=config)
# Input placeholder.
net_shape = (300, 300)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input, None, None, (None, None), resize=ssd_vgg_preprocessing.Resize.NONE)
#by3ml whetining lel image b2no y4el mn 2l sora means w resize leha w byzbt 7ga fl bbox 
image_4d = tf.expand_dims(image_pre, 0)#[1, height, width, channels]

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

# Restore SSD model.
ckpt_filename = '/home/sarah/SSD-Tensorflow/checkpoints/model.ckpt-150000'
# ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)
Example #5
0
                   [2, .5, 3, 1. / 3], [2, .5], [2, .5]],
    anchor_steps=[8, 16, 32, 64],
    anchor_offset=0.5,
    normalizations=[20, -1, -1, -1, -1, -1],
    prior_scaling=[0.1, 0.1, 0.2, 0.2])

data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
large = tf.placeholder(tf.bool)
medium = tf.placeholder(tf.bool)

# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input,
    None,
    None, (300, 300),
    data_format,
    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE,
    large=large,
    medium=medium)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
#reuse =True
ssd_net = ssd_vgg_300.SSDNet(default_params)
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                   is_training=False,
                                                   reuse=tf.AUTO_REUSE,
                                                   large=large,
                                                   medium=medium)
def set_centers():

    print("开启线程:将object_centers放入queue")

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
    ckpt_filename = 'checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'

    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    def process_image(img,
                      select_threshold=0.5,
                      nms_threshold=.45,
                      net_shape=(300, 300)):
        # Run SSD network.
        rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
            [image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

        # Get classes and bboxes from the net outputs.
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            ssd_anchors,
            select_threshold=select_threshold,
            img_shape=net_shape,
            num_classes=21,
            decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                            rscores,
                                                            rbboxes,
                                                            top_k=400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(
            rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
        # Resize bboxes to original image shape. Note: useless for Resize.WARP!
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
        return rclasses, rscores, rbboxes

    def get_centers(rclasses, rbboxes):
        # get center location of object

        number_classes = rclasses.shape[0]
        object_centers = []
        for i in range(number_classes):
            object_center = dict()
            object_center['i'] = i
            object_center['class'] = rclasses[i]
            object_center['x'] = (rbboxes[i, 1] +
                                  rbboxes[i, 3]) / 2  # 对象中心的坐标x
            object_center['y'] = (rbboxes[i, 0] +
                                  rbboxes[i, 2]) / 2  # 对象中心的坐标y
            object_centers.append(object_center)
        return object_centers

    count = 0
    cap = cv2.VideoCapture(0)

    while count < 100:
        # 打开摄像头
        ret, img = cap.read()
        rclasses, rscores, rbboxes = process_image(img)
        '''
        classes:
        1.Aeroplanes     2.Bicycles   3.Birds       4.Boats           5.Bottles
        6.Buses          7.Cars       8.Cats        9.Chairs          10.Cows
        11.Dining tables 12.Dogs      13.Horses     14.Motorbikes     15.People
        16.Potted plants 17.Sheep     18.Sofas      19.Trains         20.TV/Monitors
        '''
        object_centers = get_centers(rclasses, rbboxes)
        # print("put object centers: " + str(object_centers))
        for object_center in object_centers:
            if object_center['class'] == 5 or object_center['class'] == 7:
                new_object_center = object_center
                q.put(new_object_center)
                count += 1
                break
    print("完成输入")
    cap.release()
Example #7
0
from preprocessing import ssd_vgg_preprocessing

ckpt_filename = '/mogu/liubang/mytf/SSD-Tensorflow/logs2/model.ckpt-122449'
NUM = 7

# SSD object.
reuse = True if 'ssd' in locals() else None
params = ssd_vgg_512.SSDNet.default_params
ssd_params = params._replace(num_classes=NUM)
ssd = ssd_vgg_512.SSDNet(ssd_params)

# Image pre-processimg
out_shape = ssd.params.img_shape
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
image_pre, labels_pre, bboxes_pre, bbox_img = \
    ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, out_shape,
                                              resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)

image_4d = tf.expand_dims(image_pre, 0)

# SSD construction.
with slim.arg_scope(ssd.arg_scope(weight_decay=0.0005)):
    predictions, localisations, logits, end_points = ssd.net(image_4d,
                                                             is_training=False,
                                                             reuse=reuse)

# SSD default anchor boxes.
img_shape = out_shape
layers_anchors = ssd.anchors(img_shape, dtype=np.float32)

for k in sorted(end_points.keys()):
    print(k, end_points[k].get_shape())
Example #8
0
def demo6():
    ttf_img, _, _, _ = ssd_vgg_preprocessing.preprocess_for_eval(tf.constant(misc.imread('dog.jpg'), dtype=tf.uint8), None, None, (300,300), 'NHWC', resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    isess = tf.Session()
    rtfimg = isess.run([ttf_img])
    print rtfimg[0]
    """
Example #9
0
def ssd_eval(dataset_name, dataset_dir, batch_size, eval_dir):
    
    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():

        tf_global_step = slim.get_or_create_global_step()
        
        # Dataset + SSD Model + Pre-processing
        dataset = dataset_factory.get_dataset(dataset_name, 'test', dataset_dir)
        
        ssd_net = ssd_vgg_300.SSDNet()
        ssd_shape = net_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)

        # Create a dataset provider and batches
        with tf.device('/cpu:0'):
            with tf.name_scope(dataset_name + '_data_provider'):
                provider = slim.dataset_data_provider.DatasetDataProvider(
                    dataset,
                    common_queue_capacity = 2 * batch_size,
                    common_queue_min = batch_size,
                    shuffle = False
                )

            [image, shape, glabels, gbboxes] = provider.get(['image', 'shape','object/label', 'object/bbox'])
            [gdifficults] = provider.get(['object/difficult'])

            image, glabels, gbboxes, gbbox_img = ssd_vgg_preprocessing.preprocess_for_eval(image, glabels, gbboxes, ssd_shape,
                                                                    data_format = data_format, 
                                                                    resize = ssd_vgg_preprocessing.Resize.WARP_RESIZE)

            gclasses, glocalizations, gscores = ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
            batch_shape = [1] * 5 + [len(ssd_anchors)] * 3

            # Evaluation Batch
            r = tf.train.batch(reshape_list([image, glabels, gbboxes, gdifficults, gbbox_img, gclasses, glocalizations, gscores]),
                                batch_size = batch_size, 
                                num_threads = 1,
                                capacity = 5 * batch_size, 
                                dynamic_pad = True)
            (b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img, b_gclasses, b_glocalizations,
                b_gscores) = reshape_list(r, batch_shape)

        # SSD network + output decoding
        arg_scope = ssd_net.arg_scope(data_format= data_format)
        with slim.arg_scope(arg_scope):
            predictions, localizations, logits, _ = ssd_net.net(b_image, is_training=False)
            
        ssd_net.losses(logits, localizations, b_gclasses, b_glocalizations, b_gscores)

        with tf.device('/device:CPU:0'):
            localizations = ssd_net.bboxes_decode(localizations, ssd_anchors)
            rscores, rbboxes = ssd_net.detected_bboxes(predictions, localizations,
                                                        select_threshold=0.01,
                                                        nms_threshold=0.45,
                                                        clipping_bbox=None,
                                                        top_k=400,
                                                        keep_top_k=200)
            
            num_gbboxes, tp, fp, rscores = tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes,
                                                                    b_glabels, b_gbboxes, b_gdifficults, 
                                                                    matching_threshold= 0.5)
            
        variables_to_restore = slim.get_variables_to_restore()

   
        with tf.device('/device:CPU:0'):

            dict_metrics = {}
            
            for loss in tf.get_collection(tf.GraphKeys.LOSSES):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)

            for loss in tf.get_collection('EXTRA_LOSSES'):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)
            
            for name, metric in dict_metrics.items():
                summary_name = name
                op = tf.summary.scalar(summary_name, metric[0], collections=[])
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
            
            tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores)
            for c in tp_fp_metric[0].keys():
                dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], tp_fp_metric[1][c])

            aps_VOC07 = {}
            aps_voc12 = {}
            
            for c in tp_fp_metric[0].keys():
                # precision and recall values
                pre, rec = tfe.precision_recall(*tp_fp_metric[0][c])
                
                # average precision VOC07
                v = tfe.average_precision_voc07(pre,rec)
                summary_name = 'AP_VOC07/%s' % c
                op = tf.summary.scalar(summary_name, v, collections=[])
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                aps_VOC07[c] = v

                # Average precision VOC12.
                v = tfe.average_precision_voc12(pre, rec)
                summary_name = 'AP_VOC12/%s' % c
                op = tf.summary.scalar(summary_name, v, collections=[])
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                aps_voc12[c] = v
            
            # Mean average Precision VOC07
            summary_name = 'AP_VOC07/mAP'
            mAP = tf.add_n(list(aps_VOC07.values()))/len(aps_VOC07)
            op = tf.summary.scalar(summary_name, mAP, collections=[])
            op = tf.Print(op, [mAP], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

            # Mean average precision VOC12.
            summary_name = 'AP_VOC12/mAP'
            mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12)
            op = tf.summary.scalar(summary_name, mAP, collections=[])
            op = tf.Print(op, [mAP], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)


        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(dict_metrics)

        # Evaluation Loop

        gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.9)
        config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)


        num_batches = math.ceil(dataset.num_samples / float(batch_size))
        tf.logging.info('Evaluating %s' % ckpt_filename)
        start = time.time()
        slim.evaluation.evaluate_once(master= '', 
                                      checkpoint_path = ckpt_filename,
                                      logdir= eval_dir, 
                                      num_evals= num_batches,
                                      eval_op= flatten(list(names_to_updates.values())),
                                      variables_to_restore= variables_to_restore,
                                      session_config = config)
        # log time spent
        elapsed = time.time() - start
        print('Time Spent: %.3f' % elapsed)
        print('Time Spent per batch: %.3f seconds' % (elapsed/num_batches))
Example #10
0
    def __init__(self, figsize=(10, 10)):
        config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
        self._sess = tf.Session(config=config)
        self._cv_bridge = CvBridge()

        self.distance = []
        self.center = 0.0

        self.depth_horizon = 59

        self.robot_x = 0.0
        self.robot_y = 0.0
        self.banana_data = []
        self.apple_data = []
        self.cable_data = []
        self.latest_cable_x = -100.0
        self.latest_cable_y = -100.0
        self.r = 0.3
        self.data_count = []

        self._tf_listener = rostf.TransformListener()
        self._latest_point = None

        net_shape = (300, 300)
        data_format = 'NHWC'

        self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        # Evaluation pre-processing: resize to SSD net shape.
        self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = \
            ssd_vgg_preprocessing.preprocess_for_eval(
                self.img_input, None, None, net_shape, data_format,
                resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
        self.image_4d = tf.expand_dims(self.image_pre, 0)

        self.ssd_net = ssd_vgg_300.SSDNet()
        with tf.contrib.slim.arg_scope(
                self.ssd_net.arg_scope(data_format=data_format)):
            self.predictions, self.localisations, _, _ = self.ssd_net.net(
                self.image_4d, is_training=False)

        # Restore SSD model.
        ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
        # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
        self._sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()
        saver.restore(self._sess, ckpt_filename)

        # SSD default anchor boxes.
        self._ssd_anchors = self.ssd_net.anchors(net_shape)

        #self._sub = rospy.Subscriber('camera/rgb/image_color/compressed', CompressedImage, self.callback, queue_size=1)
        self._pub_img = rospy.Publisher('ssd_image', Image, queue_size=1)
        self._pub_rslt = rospy.Publisher('ssd_result',
                                         Float32MultiArray,
                                         queue_size=1)
        self._pub_detect = rospy.Publisher('detect',
                                           Float32MultiArray,
                                           queue_size=1)
        self._pub_banana_point = rospy.Publisher('banana_point',
                                                 Float32MultiArray,
                                                 queue_size=1)
        self._pub_apple_point = rospy.Publisher('apple_point',
                                                Float32MultiArray,
                                                queue_size=1)
        self._pub_cable_point = rospy.Publisher('cable_point',
                                                Float32MultiArray,
                                                queue_size=1)
        sub_rgb = message_filters.Subscriber(
            "camera/rgb/image_color/compressed",
            CompressedImage,
            queue_size=1,
            buff_size=2**24)
        sub_depth = message_filters.Subscriber("camera/depth/image",
                                               Image,
                                               queue_size=1,
                                               buff_size=2**24)
        self.mf = message_filters.ApproximateTimeSynchronizer(
            [sub_rgb, sub_depth], 1,
            10.0)  #2:queuesize 3:datanodoukinokyoyouhanni
        self.mf.registerCallback(self.callback)
        self.marker_pub = rospy.Publisher("test_text", Marker, queue_size=10)
        self.marker_voting_cable_pub = rospy.Publisher("voting_cable_point",
                                                       Marker,
                                                       queue_size=10)
        self.marker_voting_banana_pub = rospy.Publisher("voting_banana_point",
                                                        Marker,
                                                        queue_size=10)
        self.marker_voting_apple_pub = rospy.Publisher("voting_apple_point",
                                                       Marker,
                                                       queue_size=10)

        self._colors = []
        for i in xrange(21):
            _color = (random.randint(0, 255), random.randint(0, 255),
                      random.randint(0, 255))
            self._colors.append(_color)
Example #11
0
def ssd_test(path):

    # Input Placeholder
    img_input = tf.placeholder(tf.uint8, shape= (None, None, 3))

    # Evaluation pre-processing: resize to ssd net shape
    image_pre, labels_pre, bbox_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input,
                                                    None, None, net_shape, data_format,
                                                    resize= ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, axis=0)

    # Define the SSD model
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format= data_format)):
        predictions, localizations, _, _ = ssd_net.net(image_4d, is_training= False, reuse = reuse)


    # SSD default anchor boxes
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing pipeline

    # Tensorflow Session: grow memeory when needed, do not allow full GPU usage
    gpu_options = tf.GPUOptions(allow_growth = True)
    config = tf.ConfigProto(log_device_placement = False, gpu_options = gpu_options)
    
    isess = tf.InteractiveSession(config = config)

    # Restore the SSD model
    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # Run the SSD network
    def post_process(img, select_thresh=0.5, nms_thresh=0.45):
        rimg, rpredictions, rlocalizations, rbbox_img = isess.run([image_4d, predictions, localizations, bbox_img],
                                                            feed_dict= {img_input: img})
        
        # get the classes and bboxes from the output
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(rpredictions, rlocalizations,
                                                            ssd_anchors, select_threshold=select_thresh,
                                                            img_shape = net_shape, num_classes = 21,
                                                            decode = True)
        
        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k = 400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold = nms_thresh)

        # Resize the bboxes to the original image sizes, but useless for Resize.WARP
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)

        return rclasses, rscores, rbboxes
    
    
    imgs = os.listdir(path)
    for i in range(len(imgs)):
        img_path = os.path.join(path, imgs[i])
        img = mpimg.imread(img_path)
        rclasses, rscores, rbboxes = post_process(img)
        visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
def get_object_center(q, detect_class):

    # classes:
    # 1.Aeroplanes     2.Bicycles   3.Birds       4.Boats           5.Bottles
    # 6.Buses          7.Cars       8.Cats        9.Chairs          10.Cows
    # 11.Dining tables 12.Dogs      13.Horses     14.Motorbikes     15.People
    # 16.Potted plants 17.Sheep     18.Sofas      19.Trains         20.TV/Monitors

    slim = tf.contrib.slim

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
    ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'

    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    def process_image(img,
                      select_threshold=0.5,
                      nms_threshold=.45,
                      net_shape=(300, 300)):
        # Run SSD network.
        rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
            [image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

        # Get classes and bboxes from the net outputs.
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            ssd_anchors,
            select_threshold=select_threshold,
            img_shape=net_shape,
            num_classes=21,
            decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                            rscores,
                                                            rbboxes,
                                                            top_k=400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(
            rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
        # Resize bboxes to original image shape. Note: useless for Resize.WARP!
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
        return rclasses, rscores, rbboxes

    def get_bboxes(rclasses, rbboxes):
        # get center location of object

        number_classes = rclasses.shape[0]
        object_bboxes = []
        for i in range(number_classes):
            object_bbox = dict()
            object_bbox['i'] = i
            object_bbox['class'] = rclasses[i]
            object_bbox['y_min'] = rbboxes[i, 0]
            object_bbox['x_min'] = rbboxes[i, 1]
            object_bbox['y_max'] = rbboxes[i, 2]
            object_bbox['x_max'] = rbboxes[i, 3]
            object_bboxes.append(object_bbox)
        return object_bboxes

    # load net
    net = SiamRPNvot()
    net.load_state_dict(
        torch.load(
            join(realpath(dirname(__file__)),
                 '../DaSiamRPN-master/code/SiamRPNVOT.model')))

    net.eval()

    # open video capture
    video = cv2.VideoCapture(0)

    if not video.isOpened():
        print("Could not open video")
        sys.exit()

    index = True
    while index:

        # Read first frame.
        ok, frame = video.read()
        if not ok:
            print('Cannot read video file')
            sys.exit()

        # Define an initial bounding box
        height = frame.shape[0]
        width = frame.shape[1]

        rclasses, rscores, rbboxes = process_image(frame)

        bboxes = get_bboxes(rclasses, rbboxes)
        for bbox in bboxes:
            if bbox['class'] == detect_class:
                print(bbox)
                ymin = int(bbox['y_min'] * height)
                xmin = int((bbox['x_min']) * width)
                ymax = int(bbox['y_max'] * height)
                xmax = int((bbox['x_max']) * width)
                cx = (xmin + xmax) / 2
                cy = (ymin + ymax) / 2
                h = ymax - ymin
                w = xmax - xmin
                new_bbox = (cx, cy, w, h)
                print(new_bbox)
                index = False
                break

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    state = SiamRPN_init(frame, target_pos, target_sz, net)

    # tracking and visualization
    toc = 0
    count_number = 0

    while True:

        # Read a new frame
        ok, frame = video.read()
        if not ok:
            break

        # Start timer
        tic = cv2.getTickCount()

        # Update tracker
        state = SiamRPN_track(state, frame)  # track
        # print(state)

        toc += cv2.getTickCount() - tic

        if state:

            res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            res = [int(l) for l in res]

            cv2.rectangle(frame, (res[0], res[1]),
                          (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3)

            count_number += 1
            # set object_center
            object_center = dict()
            object_center['x'] = state['target_pos'][0] / width
            object_center['y'] = state['target_pos'][1] / height
            q.put(object_center)

            if (not state) or count_number % 40 == 3:
                # Tracking failure
                cv2.putText(frame, "Tracking failure detected", (100, 80),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
                index = True
                while index:
                    ok, frame = video.read()
                    rclasses, rscores, rbboxes = process_image(frame)
                    bboxes = get_bboxes(rclasses, rbboxes)
                    for bbox in bboxes:
                        if bbox['class'] == detect_class:
                            ymin = int(bbox['y_min'] * height)
                            xmin = int(bbox['x_min'] * width)
                            ymax = int(bbox['y_max'] * height)
                            xmax = int(bbox['x_max'] * width)
                            cx = (xmin + xmax) / 2
                            cy = (ymin + ymax) / 2
                            h = ymax - ymin
                            w = xmax - xmin
                            new_bbox = (cx, cy, w, h)
                            target_pos, target_sz = np.array(
                                [cx, cy]), np.array([w, h])
                            state = SiamRPN_init(frame, target_pos, target_sz,
                                                 net)

                            p1 = (int(xmin), int(ymin))
                            p2 = (int(xmax), int(ymax))
                            cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1)

                            index = 0

                            break

        # 调整图片大小
        resized_frame = cv2.resize(frame,
                                   None,
                                   fx=0.65,
                                   fy=0.65,
                                   interpolation=cv2.INTER_AREA)
        # 水平翻转图片(为了镜像显示)
        horizontal = cv2.flip(resized_frame, 1, dst=None)

        # 显示图片
        cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL)
        cv2.imshow('SSD+SiamRPN', horizontal)

        # Exit if ESC pressed
        k = cv2.waitKey(1) & 0xff
        if k == 27:
            break

    video.release()
    cv2.destroyAllWindows()
def cmd(image_name):
    #path = '../demo/'
    #image_names = sorted(os.listdir(path))
    #image_name="car2.jpg"

    #img = mpimg.imread(path + image_names[-5])
    img = mpimg.imread(image_name)
    print("get image. Ready to process")
    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
    # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)
    # Main image processing routine.
    select_threshold = 0.5
    nms_threshold = .45
    net_shape = (300, 300)
    # Run SSD network.
    rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
        [image_4d, predictions, localisations, bbox_img],
        feed_dict={img_input: img})

    # Get classes and bboxes from the net outputs.
    rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
        rpredictions,
        rlocalisations,
        ssd_anchors,
        select_threshold=select_threshold,
        img_shape=net_shape,
        num_classes=21,
        decode=True)

    rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
    rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                        rscores,
                                                        rbboxes,
                                                        top_k=400)
    rclasses, rscores, rbboxes = np_methods.bboxes_nms(
        rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
    # Resize bboxes to original image shape. Note: useless for Resize.WARP!
    rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)

    # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
    centerx = visualization.get_center_x(img, rclasses, rscores, rbboxes)
    width = img.shape[1]
    #left in (0, 40%], mid in (40%, 60%), right in [60%, 100%)
    left = width * 0.4
    right = width * 0.6

    if centerx < 0:
        print("nothing in sight")
        return "stop"
    elif 0 < centerx and centerx <= left:
        return "left"
    elif left < centerx and centerx < right:
        return "middle"
    elif right <= centerx and centerx < width:
        return "right"
    else:
        print("danger centerx, out of picture")
        print("centerx: " + centerx)
        print("left: " + left)
        print("right: " + right)
        print("width: " + width)
        return "danger"
Example #14
0
from preprocessing import ssd_vgg_preprocessing

# TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
isess = tf.InteractiveSession(config=config)

# Input placeholder.
net_shape = (300, 300)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input,
    None,
    None,
    net_shape,
    data_format,
    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_300_vgg' in locals() else None
ssd_params = ssd_vgg_300.SSDNet.default_params._replace(num_classes=4)
ssd_net = ssd_vgg_300.SSDNet(ssd_params)
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                   is_training=False,
                                                   reuse=reuse)
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    summaries.add(tf.summary.image("Image", image_4d))
Example #15
0
        p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
        cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
        # Draw text...
        s = '%s/%.3f' % (classes[i], scores[i])
        p1 = (p1[0] - 5, p1[1])
        cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)


reuse = True if 'ssd' in locals() else None
# Input placeholder.
net_shape = (512, 512)
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))

image_pre, labels_pre, bboxes_pre, bbox_img = ssd_preprocessing.preprocess_for_eval(
    img_input,
    None,
    None,
    net_shape,
    resize=ssd_preprocessing.Resize.PAD_AND_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

params = ssd_vgg_512.SSDNet.default_params
ssd = ssd_vgg_512.SSDNet(params)

layers_anchors = ssd.anchors(net_shape, dtype=np.float32)

# Re-define the model
with slim.arg_scope(ssd.arg_scope(weight_decay=0.0005)):
    predictions, localisations, logits, end_points = ssd.net(image_4d,
                                                             is_training=False,
                                                             reuse=reuse)
    localisations = ssd.bboxes_decode(localisations, layers_anchors)
        # ## SSD 300 Model
        # 
        # The SSD 300 network takes 300x300 image inputs. In order to feed any image, the latter is resize to this input shape (i.e.`Resize.WARP_RESIZE`). Note that even though it may change the ratio width / height, the SSD model performs well on resized images (and it is the default behaviour in the original Caffe implementation).
        # 
        # SSD anchors correspond to the default bounding boxes encoded in the network. The SSD net output provides offset on the coordinates and dimensions of these anchors.


        # Input placeholder.
        #net_shape = (300, 300)
        net_shape = (shapeWidth, shapeHeight)

        data_format = 'NHWC' #'NHWC' #'NCHW'
        img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        # Evaluation pre-processing: resize to SSD net shape.
        image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
            img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
        image_4d = tf.expand_dims(image_pre, 0)


        # Define the SSD model.
        reuse = True if 'ssd_net' in locals() else None
        if shapeWidth==300:
            ssd_net = ssd_vgg_300.SSDNet()
        else:
            ssd_net = ssd_vgg_512.SSDNet()

        with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
            predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

        # Restore SSD model.
        if shapeWidth==300:
def main():
    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

    # Restore SSD model.
    ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
    # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    print("init ready. net restored")

    # Test on some demo image and visualize output.
    tcp_socket = socket(AF_INET, SOCK_STREAM)
    tcp_socket.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
    # 固定端口号
    tcp_socket.bind(("",12345))
    # 将主动套接字转为被动套接字
    tcp_socket.listen(128)

    print("ready to receive")
    
    while True:
        client_socket,client_addr = tcp_socket.accept()
        #conn, addr = s.accept()
        print ('Accept new connection from ', client_addr)
        new_filename = ""
        while True:
            fileinfo_size = struct.calcsize('128sl')
            buf = client_socket.recv(fileinfo_size)
            if buf:
                filename, filesize = struct.unpack('128sl', buf)
                #print(filename)
                fn = filename.decode().strip('\00')
                new_filename = os.path.join('./', fn)
                print(fn)
                print ('file new name is '+new_filename+ ', filesize is '+str(filesize))

                recvd_size = 0  # 定义已接收文件的大小
                fp = open(new_filename, 'wb')
                print('start receiving...')

                while not recvd_size == filesize:
                    if filesize - recvd_size > 1024:
                        data = client_socket.recv(1024)
                        recvd_size += len(data)
                    else:
                        data = client_socket.recv(filesize - recvd_size)
                        recvd_size = filesize
                    fp.write(data)
                fp.close()
                print('end receive...')

            #client_socket.send(commamd.encode())
            cmd_str = get_cmd(isess, ssd_anchors, image_4d, predictions, localisations, bbox_img, img_input, new_filename)
            client_socket.send(cmd_str.encode())
            print("end sending")
            client_socket.close()
            break