def __init__(self, NUM_DETCTIONS):
        threading.Thread.__init__(self)

        self.NUM = NUM_DETCTIONS
        self.isess = tf.InteractiveSession()
        net_shape = (300, 300)
        data_format = 'NHWC'
        self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        image_pre, labels_pre, bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
            self.img_input,
            None,
            None,
            net_shape,
            data_format,
            resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
        self.image_4d = tf.expand_dims(image_pre, 0)
        reuse = None
        ssd_net = ssd_vgg_300.SSDNet()
        with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
            self.predictions, self.localisations, _, _ = ssd_net.net(
                self.image_4d, is_training=False, reuse=reuse)

        #ckpt_filename = './checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
        ckpt = tf.train.get_checkpoint_state(
            './checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt')
        #ckpt_filename = './checkpoints/ssd_300_vgg.ckpt/ssd_300_vgg.ckpt'
        self.isess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(self.isess,
                          "VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt")
        self.ssd_anchors = ssd_net.anchors(net_shape)
        self.detecction = []
        self.iDetections = []
Exemple #2
0
def get_ssd_model_params(ckpt_filename):
    """获取ssd model"""

    slim = tf.contrib.slim

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

    # Restore SSD model.
    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    return {'isess': isess, 'image_4d': image_4d, 'predictions': predictions, 'localisations': localisations,
            'bbox_img': bbox_img, 'img_input': img_input, 'ssd_anchors': ssd_anchors}
Exemple #3
0
    def __init__(self, weights = '../checkpoints/ssd_300_vgg.ckpt', mem_frac=1):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_frac)

        self.sess = sess =tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
        self.isess = isess = tf.InteractiveSession(config=config)
        self.net_shape = net_shape = (300, 300)
        data_format = 'NHWC'
        img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
            img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
        image_4d = tf.expand_dims(image_pre, 0)
        reuse = True if 'ssd_net' in locals() else None
        ssd_net = ssd_vgg_300.SSDNet()
        with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
            predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)
        isess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(isess, weights)

        # SSD default anchor boxes.
        self.ssd_anchors = ssd_net.anchors(net_shape)
        self.bbox_img = bbox_img
        self.img_input = img_input
        self.image_4d = image_4d
        self.ssd_net = ssd_net
        self.predictions = predictions
        self.localisations = localisations
    def load(self,
             net_shape=(300, 300),
             data_format='NHWC',
             ckpt_filename=PERSON_MODEL):
        self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        # Evaluation pre-processing: resize to SSD net shape.
        self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
            self.img_input,
            None,
            None,
            net_shape,
            data_format,
            resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
        self.image_4d = tf.expand_dims(self.image_pre, 0)

        # Define the SSD model.
        ssd_net = ssd_vgg_300.SSDNet()

        with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
            self.predictions, self.localisations, _, _ = ssd_net.net(
                self.image_4d, is_training=False, reuse=False)

        self.sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(self.sess, ckpt_filename)

        # SSD default anchor boxes.
        reuse = True
        self.ssd_anchors = ssd_net.anchors(net_shape)
Exemple #5
0
def get_tenors(input=None, reuse=None,bbox=False):

# Input placeholder.
    if not input == None:
        assert(input.get_shape().ndims == 4)
        image_4d=input-np.array([123.6800, 116.7790, 103.9390]).reshape((1,1,1,3))
    else:
        image_4d= tf.placeholder(tf.uint8, shape=(None ,None, None, 3))
    if bbox:
        image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval_multi( image_4d, None, None, (None, None), resize=ssd_vgg_preprocessing.Resize.NONE)
    if input==None:
        image_4d = tf.expand_dims(image_pre, 0)

# Network parameters.
    params = ssd_vgg_300.SSDNet.default_params
    params = params._replace(num_classes=8)

# SSD network construction.
    #reuse = True if 'ssd' in locals() else None
    #print("resue ", resuse)
    ssd = ssd_vgg_300.SSDNet(params)
    with slim.arg_scope(ssd.arg_scope(weight_decay=0.0005)):
        """
        if reuse:
            tf.get_variable_scope().reuse_variables()
        """
        #predictions, localisations, logits, end_points = ssd.net(image_4d, is_training=False, reuse=reuse)
        predictions, localisations, logits, end_points = ssd.net(image_4d, is_training=False)

    end_points={}
    end_points["input"] = image_4d
    rs =r"(.*\/conv[0-9]\/conv[0-9]_[0-9]/Relu$|.*ssd_300_vgg\/pool5\/MaxPool$)"
    rc = re.compile(rs)
    for op in tf.get_default_graph().as_graph_def().node:
        gr = rc.match(op.name)
        if gr:
            end_points[op.name.split("/")[-2]] = tf.get_default_graph().get_tensor_by_name(op.name+":0")


        #init_op = tf.global_variables_initializer()
        #isess.run(init_op)

# Restore SSD model.
        #train_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.contrib.framework.get_name_scope())
        #print(tf.contrib.framework.get_name_scope())
        #assert(False)
        #saver = tf.train.Saver(var_list=train_vars)
        #saver.restore(isess, ckpt_filename)
# Save back model to clean the checkpoint?

# # Image Pipeline
# 
# Presenting the different steps of the vehicle detection pipeline.

# In[9]:
    if not input == None and not bbox:
        return end_points
    elif bbox:
        return predictions, localisations, bbox_img, logits, end_points, image_4d, ssd
Exemple #6
0
 def __init__(self, params=None):
     """Init the SSD net with some parameters. Use the default ones
     if none provided.
     """
     if isinstance(params, SSDParams):
         self.params = params
     else:
         self.params = SSDNet.default_params
     self.student_model = ssd_mobilenetv1_300.SSDNet(self.params)
     self.teacher_model = ssd_vgg_300.SSDNet()
     self.teacher_kd_layer = {}
     self.student_kd_layer = {}
Exemple #7
0
def main():
    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    sess = tf.Session(config=config)
    # isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
    # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Test on some demo image and visualize output.
    path = '../demo/'
    image_names = sorted(os.listdir(path))

    img = mpimg.imread(path + image_names[-1])
    rclasses, rscores, rbboxes = process_image(sess, img, image_4d,
                                               predictions, localisations,
                                               bbox_img, img_input,
                                               ssd_anchors)

    # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
    visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
    def __init__(self, params=None):
        # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
        self.gpu_options = tf.GPUOptions(allow_growth=True)
        self.config = tf.ConfigProto(log_device_placement=False,
                                     gpu_options=self.gpu_options)
        # Input placeholder.
        self.net_shape = (512, 512) if MODEL == 512 else (300, 300)
        self.data_format = 'NHWC'
        self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        # Evaluation pre-processing: resize to SSD net shape.
        self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
            self.img_input,
            None,
            None,
            self.net_shape,
            self.data_format,
            resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
        self.image_4d = tf.expand_dims(self.image_pre, 0)
        # Define the SSD model.
        try:
            tf.get_variable('ssd_512_vgg/conv1/conv1_1/weights'
                            ) if MODEL == 512 else tf.get_variable(
                                'ssd_300_vgg/conv1/conv1_1/weights')
            self.reuse = True  # if tf.variable_scope('ssd_300_vgg/conv1/conv1_1/weights') else None
        except ValueError:
            print('model loading failed')
            self.reuse = None
        self.ssd_net = ssd_vgg_512.SSDNet(
        ) if MODEL == 512 else ssd_vgg_300.SSDNet()
        with slim.arg_scope(
                self.ssd_net.arg_scope(data_format=self.data_format)):
            self.predictions, self.localisations, _, _ = self.ssd_net.net(
                self.image_4d, is_training=False, reuse=self.reuse)
        # Restore SSD model.
        self.ckpt_filename = './checkpoints/VGG_VOC0712_SSD_512x512_ft_iter_120000.ckpt' if MODEL == 512 else './checkpoints/ssd_300_vgg.ckpt'
        # SSD default anchor boxes.
        self.ssd_anchors = self.ssd_net.anchors(self.net_shape)

        self.isess = tf.InteractiveSession(config=self.config)
        # Load Model
        self.isess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()
        self.saver.restore(self.isess, self.ckpt_filename)
    def load(self):
        try:
            data_format = 'NHWC'
            img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
            # Evaluation pre-processing: resize to SSD net shape.
            image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
                img_input,
                None,
                None,
                self.net_shape,
                data_format,
                resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
            image_4d = tf.expand_dims(image_pre, 0)

            # Define the SSD model.
            reuse = True if 'ssd_net' in locals() else None
            if self.net_to_use == 'ssd-300':
                ssd_net = ssd_vgg_300.SSDNet()
            else:
                ssd_net = ssd_vgg_512.SSDNet()

            with tf.contrib.slim.arg_scope(
                    ssd_net.arg_scope(data_format=data_format)):
                predictions, localisations, _, _ = ssd_net.net(
                    image_4d, is_training=False, reuse=reuse)

            # Restore SSD model.
            self.isess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(self.isess, self.ckpt_filename)

            # SSD default anchor boxes.
            ssd_anchors = ssd_net.anchors(self.net_shape)
            cropper_model = SSD_Bundle(ssd_net, img_input, predictions,
                                       localisations, bbox_img, image_4d,
                                       ssd_anchors)
            return cropper_model
        except Exception as e:
            message = "Could not load model."
            print(message + str(e))
            raise Exception(message)
Exemple #10
0
    def __init__(self,
                 select_threshold=0.5,
                 nms_threshold=0.45,
                 net_shape=(300, 300)):
        self.select_threshold = select_threshold
        self.nms_threshold = nms_threshold
        self.net_shape = net_shape

        slim = tf.contrib.slim
        gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)
        self.isess = tf.InteractiveSession(config=config)

        # Input placeholder
        net_shape = (300, 300)
        data_format = 'NHWC'
        self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
            self.img_input,
            None,
            None,
            net_shape,
            data_format,
            resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
        self.image_4d = tf.expand_dims(self.image_pre, 0)

        # Define the SSD model
        reuse = True if 'ssd_net' in locals() else None
        ssd_net = ssd_vgg_300.SSDNet()
        with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
            self.predictions, self.localisations, _, _ = ssd_net.net(
                self.image_4d, is_training=False, reuse=reuse)

        # Restore SSD Model
        ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'

        self.isess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(self.isess, ckpt_filename)
        self.ssd_anchors = ssd_net.anchors(net_shape)
Exemple #11
0
 def __init__(self):
     self.isess = tf.InteractiveSession()
     self.num_classes = 3
     self.ckpt_filename = '/home/hanxy/catkin_ws/src/icra_firefly/scripts/ckpts/model.ckpt-38804'
     params = ssd_vgg_300.SSDNet.default_params._replace(
         num_classes=self.num_classes)
     ssd = ssd_vgg_300.SSDNet(params)
     self.net_shape = (300, 300)
     self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
     self.layers_anchors = ssd.anchors(self.net_shape, dtype=np.float32)
     image_pre, labels_pre, bboxes_pre, self.bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
         self.img_input, None, None, self.net_shape)
     self.image_4d = tf.expand_dims(image_pre, 0)
     arg_scope = ssd.arg_scope(weight_decay=0.00004)
     with slim.arg_scope(arg_scope):
         # 定义ssd网络
         self.predictions, self.localisations, logits, end_points = \
             ssd.net(self.image_4d, is_training=False)
     init_op = tf.global_variables_initializer()
     self.isess.run(init_op)
     # Restore SSD model.
     saver = tf.train.Saver()
     saver.restore(self.isess, self.ckpt_filename)
Exemple #12
0
def init_model(ckpt_path):

    l_VOC_CLASS = [name for name, tup in pascalvoc_common.VOC_LABELS.items()]

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)
    
    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    
    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

    # Restore SSD model.
    ckpt_filename = ckpt_path
    # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    return {'l_VOC_CLASS':l_VOC_CLASS,'ssd_anchors':ssd_anchors,'img_input':img_input,'isess':isess,'image_4d':image_4d,'predictions':predictions,'localisations':localisations,'bbox_img':bbox_img}
Exemple #13
0
def main(argv):
    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    sess = tf.Session(config=config)
    # isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
    # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Test on some demo image and visualize output.
    # import ipdb; ipdb.set_trace()
    if argv.output_dir is None:
        output_p_dir = os.path.dirname(argv.video_path)
        file_name = os.path.basename(argv.video_path).split('.')[0]
        output_dir = os.path.join(output_p_dir, file_name)
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
    else:
        if not os.path.isdir(argv.output_dir):
            assert 0, "output_dir: {} is not exist.".format(argv.output_dir)
        output_dir = argv.output_dir
    cap = cv2.VideoCapture(argv.video_path)
    num_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    count = 0
    name_template = 'frame-%06d.txt'
    while (cap.isOpened()):
        ret, img = cap.read()
        assert ret, "Close video"
        rclasses, rscores, rbboxes = process_image(sess, img, image_4d,
                                                   predictions, localisations,
                                                   bbox_img, img_input,
                                                   ssd_anchors)
        name = os.path.join(output_dir, name_template % count)
        visualization.save_bboxes_imgs_to_file(name, img, rclasses, rscores,
                                               rbboxes)
        count += 1
        print("Detection frame [%d/%d]\r" % (count, num_frame), end="")
def set_centers():

    print("开启线程:将object_centers放入queue")

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
    ckpt_filename = 'checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'

    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    def process_image(img,
                      select_threshold=0.5,
                      nms_threshold=.45,
                      net_shape=(300, 300)):
        # Run SSD network.
        rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
            [image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

        # Get classes and bboxes from the net outputs.
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            ssd_anchors,
            select_threshold=select_threshold,
            img_shape=net_shape,
            num_classes=21,
            decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                            rscores,
                                                            rbboxes,
                                                            top_k=400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(
            rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
        # Resize bboxes to original image shape. Note: useless for Resize.WARP!
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
        return rclasses, rscores, rbboxes

    def get_centers(rclasses, rbboxes):
        # get center location of object

        number_classes = rclasses.shape[0]
        object_centers = []
        for i in range(number_classes):
            object_center = dict()
            object_center['i'] = i
            object_center['class'] = rclasses[i]
            object_center['x'] = (rbboxes[i, 1] +
                                  rbboxes[i, 3]) / 2  # 对象中心的坐标x
            object_center['y'] = (rbboxes[i, 0] +
                                  rbboxes[i, 2]) / 2  # 对象中心的坐标y
            object_centers.append(object_center)
        return object_centers

    count = 0
    cap = cv2.VideoCapture(0)

    while count < 100:
        # 打开摄像头
        ret, img = cap.read()
        rclasses, rscores, rbboxes = process_image(img)
        '''
        classes:
        1.Aeroplanes     2.Bicycles   3.Birds       4.Boats           5.Bottles
        6.Buses          7.Cars       8.Cats        9.Chairs          10.Cows
        11.Dining tables 12.Dogs      13.Horses     14.Motorbikes     15.People
        16.Potted plants 17.Sheep     18.Sofas      19.Trains         20.TV/Monitors
        '''
        object_centers = get_centers(rclasses, rbboxes)
        # print("put object centers: " + str(object_centers))
        for object_center in object_centers:
            if object_center['class'] == 5 or object_center['class'] == 7:
                new_object_center = object_center
                q.put(new_object_center)
                count += 1
                break
    print("完成输入")
    cap.release()
def ssd_test(path):

    # Input Placeholder
    img_input = tf.placeholder(tf.uint8, shape= (None, None, 3))

    # Evaluation pre-processing: resize to ssd net shape
    image_pre, labels_pre, bbox_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input,
                                                    None, None, net_shape, data_format,
                                                    resize= ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, axis=0)

    # Define the SSD model
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format= data_format)):
        predictions, localizations, _, _ = ssd_net.net(image_4d, is_training= False, reuse = reuse)


    # SSD default anchor boxes
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing pipeline

    # Tensorflow Session: grow memeory when needed, do not allow full GPU usage
    gpu_options = tf.GPUOptions(allow_growth = True)
    config = tf.ConfigProto(log_device_placement = False, gpu_options = gpu_options)
    
    isess = tf.InteractiveSession(config = config)

    # Restore the SSD model
    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # Run the SSD network
    def post_process(img, select_thresh=0.5, nms_thresh=0.45):
        rimg, rpredictions, rlocalizations, rbbox_img = isess.run([image_4d, predictions, localizations, bbox_img],
                                                            feed_dict= {img_input: img})
        
        # get the classes and bboxes from the output
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(rpredictions, rlocalizations,
                                                            ssd_anchors, select_threshold=select_thresh,
                                                            img_shape = net_shape, num_classes = 21,
                                                            decode = True)
        
        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k = 400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold = nms_thresh)

        # Resize the bboxes to the original image sizes, but useless for Resize.WARP
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)

        return rclasses, rscores, rbboxes
    
    
    imgs = os.listdir(path)
    for i in range(len(imgs)):
        img_path = os.path.join(path, imgs[i])
        img = mpimg.imread(img_path)
        rclasses, rscores, rbboxes = post_process(img)
        visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
Exemple #16
0
    return objects


data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
large = tf.placeholder(tf.bool)
medium = tf.placeholder(tf.bool)

# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(img_input, None, None, (300,300), data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE,large=large,medium=medium)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
#reuse =True
ssd_net = ssd_vgg_300.SSDNet(default_params)
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=tf.AUTO_REUSE,large=large,medium=medium)

# Restore SSD model.
#ckpt_filename = '../checkpoints/VGG_ILSVRC2016_SSD_300x300_iter_440000.ckpt'
ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)

# SSD default anchor boxes.
ssd_anchors1 = ssd_net.anchors((300,300))
ssd_anchors2 = ssd_vgg_300.ssd_anchors_all_layers((75,75),fovea_params.feat_shapes,fovea_params.anchor_sizes,fovea_params.anchor_ratios,fovea_params.anchor_steps,offset=0.5,dtype=np.float32)
ssd_anchors3 = ssd_vgg_300.ssd_anchors_all_layers((150,150),medium_params.feat_shapes,medium_params.anchor_sizes,medium_params.anchor_ratios,medium_params.anchor_steps,offset=0.5,dtype=np.float32)
Exemple #17
0
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input,
    None,
    None,
    net_shape,
    data_format,
    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_300_vgg' in locals() else None
ssd_params = ssd_vgg_300.SSDNet.default_params._replace(num_classes=4)
ssd_net = ssd_vgg_300.SSDNet(ssd_params)
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                   is_training=False,
                                                   reuse=reuse)
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    summaries.add(tf.summary.image("Image", image_4d))
    f_i = 0
    # for predict_map in predictions:
    # predict_map = predict_map[:, :, :, :, 1:]
    # predict_map = tf.reduce_max(predict_map, axis=4)
    # if f_i < 3:
    # predict_list = tf.split(predict_map, 6, axis=3)
    # anchor_index = 1
    # for anchor in predict_list:
    # summaries.add(tf.summary.image("predicte_map_%d_anchor%d" % (f_i,anchor_index), tf.cast(anchor, tf.float32)))
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.DEBUG)
    with tf.Graph().as_default():

        global_step = slim.create_global_step()

        # Get the SSD network and its anchors.
        #ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
        ssd_net = ssd_vgg_300.SSDNet()
        ssd_shape = ssd_net.params.img_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)


        b_image, b_gclasses, b_glocalisations, b_gscores = \
         load_batch.get_batch(FLAGS.dataset_dir,
             FLAGS.num_readers,
             FLAGS.batch_size,
             ssd_shape,
             ssd_net,
             ssd_anchors,
             FLAGS.num_preprocessing_threads,
             is_training = True)

        with tf.device(FLAGS.gpu_train):
            arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay)
            with slim.arg_scope(arg_scope):
                predictions, localisations, logits, end_points = \
                 ssd_net.net(b_image, is_training=True)
            # Add loss function.
            total_loss = ssd_net.losses(logits,
                                        localisations,
                                        b_gclasses,
                                        b_glocalisations,
                                        b_gscores,
                                        match_threshold=FLAGS.match_threshold,
                                        negative_ratio=FLAGS.negative_ratio,
                                        alpha=FLAGS.loss_alpha,
                                        label_smoothing=FLAGS.label_smoothing)

        # Gather initial summaries.

        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        for loss in tf.get_collection('EXTRA_LOSSES'):
            summaries.add(tf.summary.scalar(loss.op.name, loss))

        with tf.device(FLAGS.gpu_train):
            learning_rate = tf_utils.configure_learning_rate(
                FLAGS, FLAGS.num_samples, global_step)
            # Configure the optimization procedure
            optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

            ## Training

            train_op = slim.learning.create_train_op(total_loss, optimizer)

        # =================================================================== #
        # Kicks off the training.
        # =================================================================== #
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(gpu_options=gpu_options,
                                log_device_placement=False,
                                allow_soft_placement=True)
        saver = tf.train.Saver(max_to_keep=1,
                               keep_checkpoint_every_n_hours=1.0,
                               write_version=2,
                               pad_step_number=False)
        slim.learning.train(train_op,
                            logdir=FLAGS.train_dir,
                            master='',
                            is_chief=True,
                            init_fn=tf_utils.get_init_fn(FLAGS),
                            number_of_steps=FLAGS.max_number_of_steps,
                            log_every_n_steps=FLAGS.log_every_n_steps,
                            save_summaries_secs=FLAGS.save_summaries_secs,
                            saver=saver,
                            save_interval_secs=FLAGS.save_interval_secs,
                            session_config=config,
                            sync_optimizer=None)
def cmd(image_name):
    #path = '../demo/'
    #image_names = sorted(os.listdir(path))
    #image_name="car2.jpg"

    #img = mpimg.imread(path + image_names[-5])
    img = mpimg.imread(image_name)
    print("get image. Ready to process")
    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
    # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)
    # Main image processing routine.
    select_threshold = 0.5
    nms_threshold = .45
    net_shape = (300, 300)
    # Run SSD network.
    rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
        [image_4d, predictions, localisations, bbox_img],
        feed_dict={img_input: img})

    # Get classes and bboxes from the net outputs.
    rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
        rpredictions,
        rlocalisations,
        ssd_anchors,
        select_threshold=select_threshold,
        img_shape=net_shape,
        num_classes=21,
        decode=True)

    rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
    rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                        rscores,
                                                        rbboxes,
                                                        top_k=400)
    rclasses, rscores, rbboxes = np_methods.bboxes_nms(
        rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
    # Resize bboxes to original image shape. Note: useless for Resize.WARP!
    rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)

    # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
    centerx = visualization.get_center_x(img, rclasses, rscores, rbboxes)
    width = img.shape[1]
    #left in (0, 40%], mid in (40%, 60%), right in [60%, 100%)
    left = width * 0.4
    right = width * 0.6

    if centerx < 0:
        print("nothing in sight")
        return "stop"
    elif 0 < centerx and centerx <= left:
        return "left"
    elif left < centerx and centerx < right:
        return "middle"
    elif right <= centerx and centerx < width:
        return "right"
    else:
        print("danger centerx, out of picture")
        print("centerx: " + centerx)
        print("left: " + left)
        print("right: " + right)
        print("width: " + width)
        return "danger"
Exemple #20
0
#net_shape = (512, 512)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input,
    None,
    None,
    net_shape,
    data_format,
    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
ssd_net = ssd_vgg_300.SSDNet()  # choose net here!!!
#ssd_net = ssd_vgg_512.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                   is_training=False,
                                                   reuse=reuse)

# Restore SSD model.
ckpt_filename = '/root/train/model.ckpt-1666'
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)

# SSD default anchor boxes.
ssd_anchors = ssd_net.anchors(net_shape)
def ssd_eval(dataset_name, dataset_dir, batch_size, eval_dir):
    
    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():

        tf_global_step = slim.get_or_create_global_step()
        
        # Dataset + SSD Model + Pre-processing
        dataset = dataset_factory.get_dataset(dataset_name, 'test', dataset_dir)
        
        ssd_net = ssd_vgg_300.SSDNet()
        ssd_shape = net_shape
        ssd_anchors = ssd_net.anchors(ssd_shape)

        # Create a dataset provider and batches
        with tf.device('/cpu:0'):
            with tf.name_scope(dataset_name + '_data_provider'):
                provider = slim.dataset_data_provider.DatasetDataProvider(
                    dataset,
                    common_queue_capacity = 2 * batch_size,
                    common_queue_min = batch_size,
                    shuffle = False
                )

            [image, shape, glabels, gbboxes] = provider.get(['image', 'shape','object/label', 'object/bbox'])
            [gdifficults] = provider.get(['object/difficult'])

            image, glabels, gbboxes, gbbox_img = ssd_vgg_preprocessing.preprocess_for_eval(image, glabels, gbboxes, ssd_shape,
                                                                    data_format = data_format, 
                                                                    resize = ssd_vgg_preprocessing.Resize.WARP_RESIZE)

            gclasses, glocalizations, gscores = ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
            batch_shape = [1] * 5 + [len(ssd_anchors)] * 3

            # Evaluation Batch
            r = tf.train.batch(reshape_list([image, glabels, gbboxes, gdifficults, gbbox_img, gclasses, glocalizations, gscores]),
                                batch_size = batch_size, 
                                num_threads = 1,
                                capacity = 5 * batch_size, 
                                dynamic_pad = True)
            (b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img, b_gclasses, b_glocalizations,
                b_gscores) = reshape_list(r, batch_shape)

        # SSD network + output decoding
        arg_scope = ssd_net.arg_scope(data_format= data_format)
        with slim.arg_scope(arg_scope):
            predictions, localizations, logits, _ = ssd_net.net(b_image, is_training=False)
            
        ssd_net.losses(logits, localizations, b_gclasses, b_glocalizations, b_gscores)

        with tf.device('/device:CPU:0'):
            localizations = ssd_net.bboxes_decode(localizations, ssd_anchors)
            rscores, rbboxes = ssd_net.detected_bboxes(predictions, localizations,
                                                        select_threshold=0.01,
                                                        nms_threshold=0.45,
                                                        clipping_bbox=None,
                                                        top_k=400,
                                                        keep_top_k=200)
            
            num_gbboxes, tp, fp, rscores = tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes,
                                                                    b_glabels, b_gbboxes, b_gdifficults, 
                                                                    matching_threshold= 0.5)
            
        variables_to_restore = slim.get_variables_to_restore()

   
        with tf.device('/device:CPU:0'):

            dict_metrics = {}
            
            for loss in tf.get_collection(tf.GraphKeys.LOSSES):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)

            for loss in tf.get_collection('EXTRA_LOSSES'):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)
            
            for name, metric in dict_metrics.items():
                summary_name = name
                op = tf.summary.scalar(summary_name, metric[0], collections=[])
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
            
            tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores)
            for c in tp_fp_metric[0].keys():
                dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], tp_fp_metric[1][c])

            aps_VOC07 = {}
            aps_voc12 = {}
            
            for c in tp_fp_metric[0].keys():
                # precision and recall values
                pre, rec = tfe.precision_recall(*tp_fp_metric[0][c])
                
                # average precision VOC07
                v = tfe.average_precision_voc07(pre,rec)
                summary_name = 'AP_VOC07/%s' % c
                op = tf.summary.scalar(summary_name, v, collections=[])
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                aps_VOC07[c] = v

                # Average precision VOC12.
                v = tfe.average_precision_voc12(pre, rec)
                summary_name = 'AP_VOC12/%s' % c
                op = tf.summary.scalar(summary_name, v, collections=[])
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                aps_voc12[c] = v
            
            # Mean average Precision VOC07
            summary_name = 'AP_VOC07/mAP'
            mAP = tf.add_n(list(aps_VOC07.values()))/len(aps_VOC07)
            op = tf.summary.scalar(summary_name, mAP, collections=[])
            op = tf.Print(op, [mAP], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

            # Mean average precision VOC12.
            summary_name = 'AP_VOC12/mAP'
            mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12)
            op = tf.summary.scalar(summary_name, mAP, collections=[])
            op = tf.Print(op, [mAP], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)


        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(dict_metrics)

        # Evaluation Loop

        gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.9)
        config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)


        num_batches = math.ceil(dataset.num_samples / float(batch_size))
        tf.logging.info('Evaluating %s' % ckpt_filename)
        start = time.time()
        slim.evaluation.evaluate_once(master= '', 
                                      checkpoint_path = ckpt_filename,
                                      logdir= eval_dir, 
                                      num_evals= num_batches,
                                      eval_op= flatten(list(names_to_updates.values())),
                                      variables_to_restore= variables_to_restore,
                                      session_config = config)
        # log time spent
        elapsed = time.time() - start
        print('Time Spent: %.3f' % elapsed)
        print('Time Spent per batch: %.3f seconds' % (elapsed/num_batches))
Exemple #22
0
def main_train():
    dirname = os.path.dirname(__file__)
    dataset_dir = '{}/datasets/VOC_SSD'.format(dirname)
    ssd_net = ssd_vgg_300.SSDNet()

    train_params = TrainParams(weight_decay=0.00004,
                               batch_size=32,
                               optimizer='adam',
                               adam_beta1=0.9,
                               adam_beta2=0.999,
                               opt_epsilon=1.0)

    ssd_params = SSDParams(loss_alpha=1.,
                           negative_ratio=3.,
                           match_threshold=0.5,
                           label_smoothing=0.0)

    learning_rate_params = LearningRateParams(learning_rate=0.01,
                                              end_learning_rate=0.0001,
                                              learning_rate_decay_factor=0.94,
                                              num_epochs_per_decay=2.0)

    graph = tf.Graph()
    with graph.as_default():
        with tf.device('/device:CPU:0'):
            global_step = tf.train.create_global_step()

        dataset = voc_dataset.get_dataset(dataset_dir, 'train')
        ssd_shape = ssd_net.params.img_shape

        print '---- enter anchors ----'

        ssd_anchors = ssd_net.anchors()

        print '---- end anchors ----'

        image_preprocessing_fn = ssd_vgg_preprocessing.preprocess_for_train

        provider = voc_dataset.get_dataset_provider(dataset, 4, train_params.batch_size)

        [image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
                                                         'object/label',
                                                         'object/bbox'])

        print '---- from provider ----'
        print image
        print shape
        print glabels
        print gbboxes
        print '---- end provider ----'

        print '---- enter preprocessing ----'
        image, glabels, gbboxes = image_preprocessing_fn(image, glabels, gbboxes, ssd_shape, 'NHWC')
        print '---- end preprocessing ----'

        print '---- encode bbox ----'
        gclasses, glocalisations, gscores = ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors)
        print '---- end encode ----'

        batch_shape = [1] + [len(ssd_anchors)] * 3

        print batch_shape

        tensors = reshape_list([image, gclasses, glocalisations, gscores])

        print 'reshape 1 ---', tensors, len(tensors)

        r = tf.train.batch(tensors, train_params.batch_size, num_threads=4, capacity=5 * train_params.batch_size)

        print 'train batch ---', r, len(r)

        b_image, b_gclasses, b_glocalisations, b_gscores = reshape_list(r, batch_shape)

        print b_image
        print b_gclasses
        print b_glocalisations
        print b_gscores

        tensors = reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores])

        print 'reshape 2 ---', tensors, len(tensors)

        batch_queue = slim.prefetch_queue.prefetch_queue(tensors, capacity=2)

        print 'batch queue ---', batch_queue

        b_image, b_gclasses, b_glocalisations, b_gscores = reshape_list(batch_queue.dequeue(), batch_shape)
        arg_scope = ssd_net.arg_scope(weight_decay=train_params.weight_decay)

        with slim.arg_scope(arg_scope):
            predictions, localisations, logits, end_points = ssd_net.ssd_net(b_image, is_training=True)

            ssd_net.losses(logits, localisations, b_gclasses, b_glocalisations, b_gscores,
                           match_threshold=ssd_params.match_threshold,
                           negative_ratio=ssd_params.negative_ratio,
                           loss_alpha=ssd_params.loss_alpha,
                           label_smoothing=ssd_params.label_smoothing)

        # moving_average_variables = slim.get_model_variables()
        # variable_averages = tf.train.ExponentialMovingAverage(0.999, global_step)
        decay_steps = int(dataset.num_samples / train_params.batch_size * learning_rate_params.num_epochs_per_decay)

        learning_rate = tf.train.exponential_decay(learning_rate_params.learning_rate,
                                                   global_step,
                                                   decay_steps,
                                                   learning_rate_params.learning_rate_decay_factor,
                                                   staircase=True)

        optimizer = tf.train.AdamOptimizer(learning_rate,
                                           train_params.adam_beta1,
                                           train_params.adam_beta2,
                                           train_params.opt_epsilon)

        print '---- loss ----'
        losses = tf.get_collection(tf.GraphKeys.LOSSES)
        print losses
        loss = tf.add_n(losses, name='loss')
        print loss
        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        print regularization_losses
        regularization_loss = tf.add_n(regularization_losses, name='regularization_loss')
        print regularization_loss
        sum_loss = tf.add_n([loss, regularization_loss])
        print sum_loss
        print '---- end ----'

        grad = optimizer.compute_gradients(sum_loss)

        grad_updates = optimizer.apply_gradients(grad, global_step)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        print 'update ops', update_ops

        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)

        train_tensor = control_flow_ops.with_dependencies([update_op], sum_loss, name='train_op')

        gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(gpu_options=gpu_options)
        saver = tf.train.Saver(max_to_keep=10,
                               keep_checkpoint_every_n_hours=1.0)

        import sys
        # with tf.Session(config=config) as sess:
            # epochs = 10
            # sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
            # for epoch in xrange(epochs):
                # print '%s/%s' % (epoch + 1, epochs)
                # tf.train.start_queue_runners(sess=sess)
                # steps = int(dataset.num_samples / train_params.batch_size)
                # for step in xrange(steps):
                    # sys.stdout.write('\r%s/%s' % (step + 1, steps))
                    # sys.stdout.flush()
                    # _, loss = sess.run([train_tensor, sum_loss])
                # print
        slim.learning.train(train_tensor,
                            '{}/trained_model'.format(dirname),
                            is_chief=True,
                            init_fn=None,
                            saver=saver,
                            session_config=config)
class SSDServer:
    NET_SHAPE = (300, 300)
    SELECT_TRESHOLD = 0.5

    NMS_TRESHOLD = 0.45
    TOP_K = 400

    ssd_net = ssd_vgg_300.SSDNet()
    ssd_anchors = ssd_net.anchors(NET_SHAPE)

    VOC_MAP = {
        0: 'none',
        1: 'aeroplane',
        2: 'bicycle',
        3: 'bird',
        4: 'boat',
        5: 'bottle',
        6: 'bus',
        7: 'car',
        8: 'cat',
        9: 'chair',
        10: 'cow',
        11: 'diningtable',
        12: 'dog',
        13: 'horse',
        14: 'motorbike',
        15: 'person',
        16: 'pottedplant',
        17: 'sheep',
        18: 'sofa',
        19: 'train',
        20: 'tvmonitor'
    }

    def __init__(self, session, inputs, outputs):
        self.session = session
        self.inputs = inputs
        self.outputs = outputs

    @staticmethod
    def initialize(model_path, signature_name):
        session = tf.Session()
        meta_graph = tf.saved_model.loader.load(
            session, [tf.saved_model.tag_constants.SERVING], model_path)

        signature = meta_graph.signature_def[signature_name]
        inputs = {}
        for inp_name, inp in signature.inputs.items():
            inputs[inp_name] = session.graph.get_tensor_by_name(inp.name)

        outputs = {}
        for out_name, out in signature.outputs.items():
            outputs[out_name] = session.graph.get_tensor_by_name(out.name)

        return SSDServer(session, inputs, outputs)

    def preprocess(self, b64_string):
        decoded = base64.b64decode(b64_string)
        jpeg_image = io.BytesIO(decoded)
        return mpimg.imread(jpeg_image, format='JPG')

    def run(self, pic_matrix):
        outputs = self.session.run(
            self.outputs, feed_dict={self.inputs["img_input"]: pic_matrix})
        rpredictions = [
            outputs['ssd_300_vgg/softmax/Reshape_1:0'],
            outputs['ssd_300_vgg/softmax_1/Reshape_1:0'],
            outputs['ssd_300_vgg/softmax_2/Reshape_1:0'],
            outputs['ssd_300_vgg/softmax_3/Reshape_1:0'],
            outputs['ssd_300_vgg/softmax_4/Reshape_1:0'],
            outputs['ssd_300_vgg/softmax_5/Reshape_1:0']
        ]
        rlocalisations = [
            outputs['ssd_300_vgg/block4_box/Reshape:0'],
            outputs['ssd_300_vgg/block7_box/Reshape:0'],
            outputs['ssd_300_vgg/block8_box/Reshape:0'],
            outputs['ssd_300_vgg/block9_box/Reshape:0'],
            outputs['ssd_300_vgg/block10_box/Reshape:0'],
            outputs['ssd_300_vgg/block11_box/Reshape:0']
        ]
        rbbox_img = outputs['bbox_img']

        return rpredictions, rlocalisations, rbbox_img

    def postprocess(self, rpredictions, rlocalisations, rbbox_img):
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            SSDServer.ssd_anchors,
            select_threshold=SSDServer.SELECT_TRESHOLD,
            img_shape=SSDServer.NET_SHAPE,
            num_classes=21,
            decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)

        rclasses, rscores, rbboxes = np_methods.bboxes_sort(
            rclasses, rscores, rbboxes, top_k=SSDServer.TOP_K)

        rclasses, rscores, rbboxes = np_methods.bboxes_nms(
            rclasses, rscores, rbboxes, nms_threshold=SSDServer.NMS_TRESHOLD)
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)

        rclasses = list(map(lambda c: SSDServer.VOC_MAP.get(c, "NA"),
                            rclasses))

        class_arr = [bytes(x, "utf-8") for x in rclasses]
        classes_tensor = hs.TensorProto(
            dtype=hs.DT_STRING,
            tensor_shape=hs.TensorShapeProto(
                dim=[hs.TensorShapeProto.Dim(size=-1)]),
            string_val=class_arr)

        scores_tensor = hs.TensorProto(
            dtype=hs.DT_DOUBLE,
            tensor_shape=hs.TensorShapeProto(
                dim=[hs.TensorShapeProto.Dim(size=-1)]),
            double_val=rscores)

        bboxes_tensor = hs.TensorProto(dtype=hs.DT_DOUBLE,
                                       tensor_shape=hs.TensorShapeProto(dim=[
                                           hs.TensorShapeProto.Dim(size=-1),
                                           hs.TensorShapeProto.Dim(size=4)
                                       ]),
                                       double_val=rbboxes.flatten())

        return classes_tensor, scores_tensor, bboxes_tensor
def get_object_center(q, detect_class):

    # classes:
    # 1.Aeroplanes     2.Bicycles   3.Birds       4.Boats           5.Bottles
    # 6.Buses          7.Cars       8.Cats        9.Chairs          10.Cows
    # 11.Dining tables 12.Dogs      13.Horses     14.Motorbikes     15.People
    # 16.Potted plants 17.Sheep     18.Sofas      19.Trains         20.TV/Monitors

    slim = tf.contrib.slim

    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input,
        None,
        None,
        net_shape,
        data_format,
        resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                       is_training=False,
                                                       reuse=reuse)

    # Restore SSD model.
    # ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
    ckpt_filename = '../SSD-Tensorflow/checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'

    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    def process_image(img,
                      select_threshold=0.5,
                      nms_threshold=.45,
                      net_shape=(300, 300)):
        # Run SSD network.
        rimg, rpredictions, rlocalisations, rbbox_img = isess.run(
            [image_4d, predictions, localisations, bbox_img],
            feed_dict={img_input: img})

        # Get classes and bboxes from the net outputs.
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions,
            rlocalisations,
            ssd_anchors,
            select_threshold=select_threshold,
            img_shape=net_shape,
            num_classes=21,
            decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                            rscores,
                                                            rbboxes,
                                                            top_k=400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(
            rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
        # Resize bboxes to original image shape. Note: useless for Resize.WARP!
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
        return rclasses, rscores, rbboxes

    def get_bboxes(rclasses, rbboxes):
        # get center location of object

        number_classes = rclasses.shape[0]
        object_bboxes = []
        for i in range(number_classes):
            object_bbox = dict()
            object_bbox['i'] = i
            object_bbox['class'] = rclasses[i]
            object_bbox['y_min'] = rbboxes[i, 0]
            object_bbox['x_min'] = rbboxes[i, 1]
            object_bbox['y_max'] = rbboxes[i, 2]
            object_bbox['x_max'] = rbboxes[i, 3]
            object_bboxes.append(object_bbox)
        return object_bboxes

    # load net
    net = SiamRPNvot()
    net.load_state_dict(
        torch.load(
            join(realpath(dirname(__file__)),
                 '../DaSiamRPN-master/code/SiamRPNVOT.model')))

    net.eval()

    # open video capture
    video = cv2.VideoCapture(0)

    if not video.isOpened():
        print("Could not open video")
        sys.exit()

    index = True
    while index:

        # Read first frame.
        ok, frame = video.read()
        if not ok:
            print('Cannot read video file')
            sys.exit()

        # Define an initial bounding box
        height = frame.shape[0]
        width = frame.shape[1]

        rclasses, rscores, rbboxes = process_image(frame)

        bboxes = get_bboxes(rclasses, rbboxes)
        for bbox in bboxes:
            if bbox['class'] == detect_class:
                print(bbox)
                ymin = int(bbox['y_min'] * height)
                xmin = int((bbox['x_min']) * width)
                ymax = int(bbox['y_max'] * height)
                xmax = int((bbox['x_max']) * width)
                cx = (xmin + xmax) / 2
                cy = (ymin + ymax) / 2
                h = ymax - ymin
                w = xmax - xmin
                new_bbox = (cx, cy, w, h)
                print(new_bbox)
                index = False
                break

    # tracker init
    target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
    state = SiamRPN_init(frame, target_pos, target_sz, net)

    # tracking and visualization
    toc = 0
    count_number = 0

    while True:

        # Read a new frame
        ok, frame = video.read()
        if not ok:
            break

        # Start timer
        tic = cv2.getTickCount()

        # Update tracker
        state = SiamRPN_track(state, frame)  # track
        # print(state)

        toc += cv2.getTickCount() - tic

        if state:

            res = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
            res = [int(l) for l in res]

            cv2.rectangle(frame, (res[0], res[1]),
                          (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 3)

            count_number += 1
            # set object_center
            object_center = dict()
            object_center['x'] = state['target_pos'][0] / width
            object_center['y'] = state['target_pos'][1] / height
            q.put(object_center)

            if (not state) or count_number % 40 == 3:
                # Tracking failure
                cv2.putText(frame, "Tracking failure detected", (100, 80),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
                index = True
                while index:
                    ok, frame = video.read()
                    rclasses, rscores, rbboxes = process_image(frame)
                    bboxes = get_bboxes(rclasses, rbboxes)
                    for bbox in bboxes:
                        if bbox['class'] == detect_class:
                            ymin = int(bbox['y_min'] * height)
                            xmin = int(bbox['x_min'] * width)
                            ymax = int(bbox['y_max'] * height)
                            xmax = int(bbox['x_max'] * width)
                            cx = (xmin + xmax) / 2
                            cy = (ymin + ymax) / 2
                            h = ymax - ymin
                            w = xmax - xmin
                            new_bbox = (cx, cy, w, h)
                            target_pos, target_sz = np.array(
                                [cx, cy]), np.array([w, h])
                            state = SiamRPN_init(frame, target_pos, target_sz,
                                                 net)

                            p1 = (int(xmin), int(ymin))
                            p2 = (int(xmax), int(ymax))
                            cv2.rectangle(frame, p1, p2, (0, 255, 0), 2, 1)

                            index = 0

                            break

        # 调整图片大小
        resized_frame = cv2.resize(frame,
                                   None,
                                   fx=0.65,
                                   fy=0.65,
                                   interpolation=cv2.INTER_AREA)
        # 水平翻转图片(为了镜像显示)
        horizontal = cv2.flip(resized_frame, 1, dst=None)

        # 显示图片
        cv2.namedWindow("SSD+SiamRPN", cv2.WINDOW_NORMAL)
        cv2.imshow('SSD+SiamRPN', horizontal)

        # Exit if ESC pressed
        k = cv2.waitKey(1) & 0xff
        if k == 27:
            break

    video.release()
    cv2.destroyAllWindows()
Exemple #25
0
#ckpt_filename = '../logs/ssd_300_kitti_13/model.ckpt-149757'

# In[18]:

# Image pre-processimg
out_shape = (None, None)
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    image, labels, bboxes, out_shape, resize=ssd_vgg_preprocessing.Resize.NONE)

image_4d = tf.expand_dims(image_pre, 0)

# SSD construction.
reuse = True if 'ssd' in locals() else None
params = ssd_vgg_300.SSDNet.default_params
params = params._replace(num_classes=8)
ssd = ssd_vgg_300.SSDNet(params)
with slim.arg_scope(ssd.arg_scope(weight_decay=0.0005)):
    predictions, localisations, logits, end_points = ssd.net(image_4d,
                                                             is_training=False,
                                                             reuse=reuse)

# In[20]:

# Initialize variables.
init_op = tf.global_variables_initializer()
sess.run(init_op)
# Restore SSD model.
saver = tf.train.Saver()
saver.restore(sess, ckpt_filename)

# In[24]:
Exemple #26
0
    def __init__(self, figsize=(10, 10)):
        config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
        self._sess = tf.Session(config=config)
        self._cv_bridge = CvBridge()

        self.distance = []
        self.center = 0.0

        self.depth_horizon = 59

        self.robot_x = 0.0
        self.robot_y = 0.0
        self.banana_data = []
        self.apple_data = []
        self.cable_data = []
        self.latest_cable_x = -100.0
        self.latest_cable_y = -100.0
        self.r = 0.3
        self.data_count = []

        self._tf_listener = rostf.TransformListener()
        self._latest_point = None

        net_shape = (300, 300)
        data_format = 'NHWC'

        self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        # Evaluation pre-processing: resize to SSD net shape.
        self.image_pre, self.labels_pre, self.bboxes_pre, self.bbox_img = \
            ssd_vgg_preprocessing.preprocess_for_eval(
                self.img_input, None, None, net_shape, data_format,
                resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
        self.image_4d = tf.expand_dims(self.image_pre, 0)

        self.ssd_net = ssd_vgg_300.SSDNet()
        with tf.contrib.slim.arg_scope(
                self.ssd_net.arg_scope(data_format=data_format)):
            self.predictions, self.localisations, _, _ = self.ssd_net.net(
                self.image_4d, is_training=False)

        # Restore SSD model.
        ckpt_filename = 'checkpoints/ssd_300_vgg.ckpt'
        # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
        self._sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()
        saver.restore(self._sess, ckpt_filename)

        # SSD default anchor boxes.
        self._ssd_anchors = self.ssd_net.anchors(net_shape)

        #self._sub = rospy.Subscriber('camera/rgb/image_color/compressed', CompressedImage, self.callback, queue_size=1)
        self._pub_img = rospy.Publisher('ssd_image', Image, queue_size=1)
        self._pub_rslt = rospy.Publisher('ssd_result',
                                         Float32MultiArray,
                                         queue_size=1)
        self._pub_detect = rospy.Publisher('detect',
                                           Float32MultiArray,
                                           queue_size=1)
        self._pub_banana_point = rospy.Publisher('banana_point',
                                                 Float32MultiArray,
                                                 queue_size=1)
        self._pub_apple_point = rospy.Publisher('apple_point',
                                                Float32MultiArray,
                                                queue_size=1)
        self._pub_cable_point = rospy.Publisher('cable_point',
                                                Float32MultiArray,
                                                queue_size=1)
        sub_rgb = message_filters.Subscriber(
            "camera/rgb/image_color/compressed",
            CompressedImage,
            queue_size=1,
            buff_size=2**24)
        sub_depth = message_filters.Subscriber("camera/depth/image",
                                               Image,
                                               queue_size=1,
                                               buff_size=2**24)
        self.mf = message_filters.ApproximateTimeSynchronizer(
            [sub_rgb, sub_depth], 1,
            10.0)  #2:queuesize 3:datanodoukinokyoyouhanni
        self.mf.registerCallback(self.callback)
        self.marker_pub = rospy.Publisher("test_text", Marker, queue_size=10)
        self.marker_voting_cable_pub = rospy.Publisher("voting_cable_point",
                                                       Marker,
                                                       queue_size=10)
        self.marker_voting_banana_pub = rospy.Publisher("voting_banana_point",
                                                        Marker,
                                                        queue_size=10)
        self.marker_voting_apple_pub = rospy.Publisher("voting_apple_point",
                                                       Marker,
                                                       queue_size=10)

        self._colors = []
        for i in xrange(21):
            _color = (random.randint(0, 255), random.randint(0, 255),
                      random.randint(0, 255))
            self._colors.append(_color)
Exemple #27
0
def inference(input=0,inputType=1):
    slim = tf.contrib.slim
    sys.path.append('../')
    from nets import ssd_vgg_300, ssd_common, np_methods
    from preprocessing import ssd_vgg_preprocessing
    from notebooks import visualization
    # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
    isess = tf.InteractiveSession(config=config)

    # Input placeholder.
    net_shape = (300, 300)
    data_format = 'NHWC'
    img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
    # Evaluation pre-processing: resize to SSD net shape.
    image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
        img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
    image_4d = tf.expand_dims(image_pre, 0)

    # Define the SSD model.
    reuse = True if 'ssd_net' in locals() else None
    ssd_net = ssd_vgg_300.SSDNet()
    with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
        predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse)

    # Restore SSD model.
    ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
    # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt'
    isess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(isess, ckpt_filename)

    # SSD default anchor boxes.
    ssd_anchors = ssd_net.anchors(net_shape)

    # Main image processing routine.
    def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)):
        # Run SSD network.
        rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img],
                                                                  feed_dict={img_input: img})

        # Get classes and bboxes from the net outputs.
        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
            rpredictions, rlocalisations, ssd_anchors,
            select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)

        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
        rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
        # Resize bboxes to original image shape. Note: useless for Resize.WARP!
        rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes)
        return rclasses, rscores, rbboxes

    # input is a image
    inputType = int(inputType)
    if inputType is 1:
        if input == 0:
            print("At least indicate 1 input video")
            exit(-1)
        # Test on some demo image and visualize output.
        img = mpimg.imread(input)
        rclasses, rscores, rbboxes = process_image(img)

        # Find the name of the category num
        print(list(map(lambda i:"{}:{}".format(i,category[i]),list(rclasses))))
        rclasses = np.array(list(map(lambda i:"{}:{}".format(i,category[i]),list(rclasses))))

        # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma)
        # plot the image directly
        visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
    elif inputType == 2:
        # input is the video
        # plot the boxes into the image
        cap = cv2.VideoCapture(input)
        fps = cap.get(cv2.CAP_PROP_FPS)
        size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        fourcc = cap.get(cv2.CAP_PROP_FOURCC)
        #fourcc = cv2.CAP_PROP_FOURCC(*'CVID')
        print('fps=%d,size=%r,fourcc=%r'%(fps,size,fourcc))
        delay=10/int(fps)
        print(delay)
        if delay <= 1:
            delay = 1
        while (cap.isOpened()):
            ret, frame = cap.read()
            print(ret)
            if ret == True:
                image = frame
                # the array based representation of the image will be used later in order to prepare the
                # result image with boxes and labels on it.
                image_np = image
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                # Actual detection.
                rclasses, rscores, rbboxes = process_image(image_np)

                #print(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses))))
                rclasses = np.array(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses))))

                # Visualization of the results of a detection.
                visualization.bboxes_draw_on_img(image_np, rclasses, rscores, rbboxes)
                cv2.imshow('frame', image_np)
                #cv2.waitKey(np.uint(delay))
                if cv2.waitKey(delay) & 0xFF == ord('q'):
                    break
                print('Ongoing...')
            else:
                break
        cap.release()
        cv2.destroyAllWindows()
    elif inputType ==3:
        print("save video")
        if input == 0:
            print("At least indicate 1 input video")
            exit(-1)
        def save_image(image_np):
            rclasses, rscores, rbboxes = process_image(image_np)
            # print(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses))))
            rclasses = np.array(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses))))
            visualization.bboxes_draw_on_img(image_np, rclasses, rscores, rbboxes)
            return image_np

        from moviepy.editor import VideoFileClip
        cap = cv2.VideoCapture(input)
        fps = cap.get(cv2.CAP_PROP_FPS)
        cap.release()
        cv2.destroyAllWindows()

        video = VideoFileClip(input)
        result = video.fl_image(save_image)
        output = os.path.join("./videos/output_{}".format(input.split("/")[-1]))
        result.write_videofile(output, fps=fps)
    else:
        cap = cv2.VideoCapture(0)

        while (True):
            # Capture frame-by-frame
            ret, frame = cap.read()

            #cv2.imshow('frame', frame)
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(frame, axis=0)
            # Actual detection.
            rclasses, rscores, rbboxes = process_image(frame)

            # print(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses))))
            rclasses = np.array(list(map(lambda i: "{}:{}".format(i, category[i]), list(rclasses))))
            # Visualization of the results of a detection.
            visualization.bboxes_draw_on_img(frame, rclasses, rscores, rbboxes)
            cv2.imshow('frame', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        # When everything done, release the capture
        cap.release()
        cv2.destroyAllWindows()
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        ssd_model = ssd_vgg_300.SSDNet()
        ssd_model.set_batch_size(FLAGS.batch_size)
        network_fn = nets_factory.get_network_fn(ssd_model, is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            common_queue_capacity=20 * FLAGS.batch_size,
            common_queue_min=10 * FLAGS.batch_size)
        [image, labels,
         bboxes] = provider.get(['image', 'object/label', 'object/bbox'])
        labels -= FLAGS.labels_offset

        if FLAGS.remove_difficult:
            difficults_gt = provider.get(['object/difficult'])
        else:
            difficults_gt = tf.zeros(tf.shape(labels), dtype=tf.int64)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name)

        eval_image_size_height = FLAGS.eval_image_size_height or ssd_model.ssd_params.image_size[
            0]
        eval_image_size_width = FLAGS.eval_image_size_width or ssd_model.ssd_params.image_size[
            1]

        image, labels_gt, bboxes_gt = image_preprocessing_fn(
            image,
            labels,
            bboxes,
            eval_image_size_height,
            eval_image_size_width,
            data_format=DATA_FORMAT,
            is_training=False)

        anchors = ssd_model.anchors_for_all_layer()
        labels_en, scores_en, bboxes_en = ssd_model.bboxes_encode(
            anchors, labels_gt, bboxes_gt)

        images, labels_gt, bboxes_gt, difficults_gt, labels_en, scores_en, bboxes_en = \
            tf.train.batch(
            [image, labels_gt, bboxes_gt, difficults_gt,labels_en, scores_en, bboxes_en],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size,
            dynamic_pad=True)

        ################################
        # SSD Model + outputs decoding #
        ################################
        logits, locs, endpoints = network_fn(images)
        ssd_model.ssd_class_and_loc_losses(logits, locs, labels_en, bboxes_en,
                                           scores_en)

        # Performing post_processing on CPU: loop-intensive, usually more efficient.
        with tf.device('/device:CPU:0'):
            # Detect objects from SSD Model outputs
            locs_aggr = ssd_model.bboxes_decode(locs, anchors)
            scores_nms, bboxes_nms = ssd_model.detected_bboxes(
                logits, locs_aggr, FLAGS.select_threshold, FLAGS.nms_threshold,
                FLAGS.select_top_k, FLAGS.keep_top_k)

            num_bboxes_gt, tp, fp = bboxes_matching_batch(
                scores_nms.keys(),
                scores_nms,
                bboxes_nms,
                labels_gt,
                bboxes_gt,
                difficults_gt,
                matching_threshold=FLAGS.matching_threshold)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        # Define the metrics:
        with tf.device('/device:CPU:0'):
            dict_metrics = {}
            # First add all losses.
            for loss in tf.get_collection(tf.GraphKeys.LOSSES):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)
            # Extra losses as well.
            for loss in tf.get_collection('EXTRA_LOSSES'):
                dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss)

            # Add metrics to summaries and Print on screen.
            for name, metric in dict_metrics.items():
                # summary_name = 'eval/%s' % name
                summary_name = name
                op = tf.summary.scalar(summary_name, metric[0], collections=[])
                # op = tf.Print(op, [metric[0]], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

            # FP and TP metrics.
            tp_fp_metric = streaming_tp_fp_arrays(num_bboxes_gt, tp, fp,
                                                  scores_nms)
            for c in tp_fp_metric[0].keys():
                dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c],
                                                tp_fp_metric[1][c])

            # Add to summaries precision/recall values.
            aps_voc12 = {}
            for c in tp_fp_metric[0].keys():
                # Precison and recall values.
                prec, rec = precision_recall(*tp_fp_metric[0][c])

                # Average precision VOC12.
                v = average_precision_voc12(prec, rec)
                summary_name = 'AP_VOC12/%s' % c
                op = tf.summary.scalar(summary_name, v, collections=[])
                # op = tf.Print(op, [v], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
                aps_voc12[c] = v

            # Mean average precision VOC12.
            summary_name = 'AP_VOC12/mAP'
            mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12)
            op = tf.summary.scalar(summary_name, mAP, collections=[])
            op = tf.Print(op, [mAP], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # Split into values and updates ops.
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(
            dict_metrics)

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)
Exemple #29
0
net_shape = (300, 300)
data_format = 'NHWC'
img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input,
    None,
    None,
    net_shape,
    data_format,
    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)

# Define the SSD model.
reuse = True if 'ssd_net' in locals() else None
ssd_net = ssd_vgg_300.SSDNet()
with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)):
    predictions, localisations, _, _ = ssd_net.net(image_4d,
                                                   is_training=False,
                                                   reuse=reuse)

# Restore SSD model.
#ckpt_filename = 'finetune_log/model.ckpt-41278'
ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt'
isess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)

# SSD default anchor boxes.
ssd_anchors = ssd_net.anchors(net_shape)
Exemple #30
0
    def __init__(self):
        tf.reset_default_graph()

        # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
        gpu_options = tf.GPUOptions(allow_growth=True)
        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)
        graph = self.load_graph(
            os.path.join(self.MODEL_DIR, self.FROZEN_MODEL_NAME))
        graph_def = graph.as_graph_def()

        # print operations
        self.print_graph_operations(graph)

        # print nodes
        #print_graph_nodes(graph_def)

        ####################
        self.input_x = graph.get_tensor_by_name('prefix/input_x:0')
        # 非常に粗いconv10_2とconv11_2を削ってもよい
        self.predictions = [
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block4_cls_pred/softmax/Reshape_1:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block7_cls_pred/softmax/Reshape_1:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block8_cls_pred/softmax/Reshape_1:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block9_cls_pred/softmax/Reshape_1:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block10_cls_pred/softmax/Reshape_1:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block11_cls_pred/softmax/Reshape_1:0')
        ]
        self.localisations = [
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block4_box/loc_pred:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block7_box/loc_pred:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block8_box/loc_pred:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block9_box/loc_pred:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block10_box/loc_pred:0'),
            graph.get_tensor_by_name(
                'prefix/ssd_300_vgg/block11_box/loc_pred:0')
        ]
        self.bbox_img = graph.get_tensor_by_name(
            'prefix/ssd_preprocessing_train/my_bbox_img/strided_slice:0')

        # SSD default anchor boxes.
        net_shape = (300, 300)
        ssd_class = nets_factory.get_network('ssd_300_vgg')
        ssd_params = ssd_class.default_params._replace(num_classes=5)
        ssd_net = ssd_vgg_300.SSDNet(ssd_params)
        self.ssd_anchors = ssd_net.anchors(net_shape)

        ########################################
        # ラベル
        ########################################
        self.VOC_LABELS = {
            0: 'none',
            1: 'stop',
            2: 'speed_10',
            3: 'speed_20',
            4: 'speed_30',
        }
        self.colors = [(random.randint(0, 255), random.randint(0, 255),
                        random.randint(0, 255))
                       for i in range(len(self.VOC_LABELS))]

        self.sess = tf.Session(graph=graph, config=config)