def __init__(self, class_names, model, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)
        self.timer = Timer(1, self.timer_callback)
        self.current_time = 0
        self.current_fps = 0
        self.exec_time = None
        self.prev_extra_time = None
        self.extra_time = None
        self.fps_time_slot = list()
        self.is_finish = False

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255*i/self.num_classes
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            self.class_colors.append(col)
Beispiel #2
0
    def __init__(self):
        NUM_CLASSES = 3 + 1
        input_shape = (300, 300, 3)

        #config_string = rospy.get_param("/traffic_light_config")
        #self.config = yaml.load(config_string)
        #self.stop_line_positions = self.config['stop_line_positions']

        # get path to resources
        #path_to_resources = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..', 'tlc')
        # "prior boxes" in the paper
        #priors = pickle.load(open(os.path.join(path_to_resources, 'prior_boxes_ssd300.pkl'), 'rb'))
        priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
        self.bbox_util = BBoxUtility(NUM_CLASSES, priors)

        # Traffic Light Classifier model and its weights
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        #self.model.load_weights(os.path.join(path_to_resources, self.config['classifier_weights_file']), by_name=True)
        #self.model.load_weights('weights.180314.hdf5', by_name=True)
        self.model.load_weights('checkpoints/weights.07-0.70.hdf5',
                                by_name=True)

        # prevent TensorFlow's ValueError when no raised backend
        dummy = np.zeros((1, 300, 300, 3))
        _ = self.model.predict(dummy, batch_size=1, verbose=0)

        # prevent TensorFlow's ValueError when no raised backend
        dummy = np.zeros((1, 300, 300, 3))
        _ = self.model.predict(dummy, batch_size=1, verbose=0)

        self.is_in_progress = False
        self.last_result = TrafficLight.UNKNOWN
Beispiel #3
0
def predict_img(numpy_array, orig_numpy_array):
    # Save the original image for attachment
    scipy.misc.imsave('temp_cat_motion.jpg', np.uint8(orig_numpy_array))
    
    # Number of voc_classes + 1
    NUM_CLASSES = 3
    input_shape=(300, 300, 3)
    # SSD model
    model = SSD300(input_shape, num_classes=NUM_CLASSES)
    model.load_weights('./model/weights.18-0.09.hdf5', by_name=True)
    bbox_util = BBoxUtility(NUM_CLASSES)
    
    # Inception v3 transfer learning model
    model_cnn = load_model(filepath='./model/model_v2.03-0.40.hdf5')
    ssd_img_size=300
    img_size=299

    inputs = []
    images = []

    images.append(orig_numpy_array)
    
    inputs.append(numpy_array.copy())
    inputs = preprocess_input(np.array(inputs))
    preds = model.predict(inputs, batch_size=1, verbose=0)
    results = bbox_util.detection_out(preds)

    cat_inside_image = False
    # If the SSD model does not find a cat, return False
    for i, img in enumerate(images):
        cat_inside_image = ssd_image(img, results, i)
    
    return cat_inside_image
Beispiel #4
0
  def __init__(self, argv):
 
    app_name  = os.path.basename(argv[0])
    name, _ = app_name.split(".")
    inifile = name + ".ini"
    
    print("inifile {}".format(inifile))
    
    parser = configparser.ConfigParser()
    parser.read(inifile)
    self.weightfile  = parser.get("WEIGHT_FILE",     "filename") 
    self.showimage   = int(parser.get("SHOW_IMAGE",   "show"))
    self.confidence  = float(parser.get("DETECTION", "confidence") )
    
    # For example, this may take a string C:/ssd_keras/weights_SSD300.hdf5

    self.classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
 
    self.n_classes = len(self.classes) + 1

    self.input_shape = (300, 300, 3)
    self.model = SSD300v2(self.input_shape, num_classes=self.n_classes)
        
    self.model.load_weights(self.weightfile, by_name=True)
    
    self.bbox_util = BBoxUtility(self.n_classes)
def feature_flow():
    bbox_util = BBoxUtility(NUM_CLASSES)
    raw_inputs, images = load_inputs(image_files)
    inputs = preprocess_input(np.array(raw_inputs))

    dump_activation_layer = 'conv4_2'
    compare_layer_name = 'conv6_2'
    print('dump_activation_layer', dump_activation_layer)
    print('target_layer_name', compare_layer_name)

    # normal SSD network
    model1 = SSD300v2(input_shape, num_classes=NUM_CLASSES)
    model1.load_weights('weights_SSD300.hdf5', by_name=True)
    predictions = run_network(model1, inputs)
    results = bbox_util.detection_out(predictions)
    plot_detections(images, results)

    # get dump layer's output (as input for flow network)
    input_img2 = inputs[1:2, :, :, :]
    layer_dump = get_layer_output(model=model1,
                                  inputs=input_img2,
                                  output_layer_name=dump_activation_layer)
    print('layer_dump.shape = ', layer_dump.shape)

    # flow (raw rgb)
    flow_rgb = compute_flow(image_files[1], image_files[0])

    print('flow.shape', flow_rgb.shape)
    imshow_fig(cv2.cvtColor(draw_hsv(flow_rgb), cv2.COLOR_BGR2RGB),
               title='flow_rgb')

    # flow (re-sized for feature map)
    flow_feature = get_flow_for_filter(flow_rgb)
    # imshow_fig(flow_feature[:, :, 0], title='flow_feature_y', cmap='gray')
    # imshow_fig(flow_feature[:, :, 1], title='flow_feature_x', cmap='gray')

    # warp image by flow_rgb
    iimg1 = cv2.imread(image_files[0])
    img_warp = warp_flow(iimg1, flow_rgb)
    imshow_fig(cv2.cvtColor(img_warp, cv2.COLOR_BGR2RGB), title='frame_2_warp')

    # shift feature
    shifted_feature = shift_filter(layer_dump, flow_feature)

    # flow net
    model2 = SSD300_conv4_3((128, 128, 512), num_classes=NUM_CLASSES)
    model2.load_weights('weights_SSD300.hdf5', by_name=True)
    predictions = run_network(model2, shifted_feature)
    results = bbox_util.detection_out(predictions)
    plot_detections(images[1:2], results)

    # get specific layer's output and compare them (for debugging)
    compare_model_layer(model1, input_img2, compare_layer_name, model2,
                        shifted_feature, compare_layer_name, True)

    sess.close()
    plt.show()
Beispiel #6
0
 def __init__(self):
     #顔検出モデルと年齢・性別検出モデルを復元
     self.age_detector = load_model("transfer_Xception_29.h5")
     NUM_CLASSES = 2
     input_shape = (300, 300, 3)
     priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
     self.bbox_util = BBoxUtility(NUM_CLASSES, priors)
     self.face_detector = SSD300(input_shape, num_classes=NUM_CLASSES)
     self.face_detector.load_weights('weights.05-3.15.hdf5', by_name=True)
    def __init__(self):
        self.node_name = "ssd_keras"
        rospy.init_node(self.node_name)
        self.class_names = [
            "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
            "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse",
            "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
            "tvmonitor"
        ]
        self.num_classes = len(self.class_names)
        self.input_shape = (300, 300, 3)
        self.model = SSD(self.input_shape, num_classes=self.num_classes)
        self.model.load_weights(pkg_path +
                                '/resources/ssd_keras/weights_SSD300.hdf5')

        self.bbox_util = BBoxUtility(self.num_classes)
        self.conf_thresh = 0.25

        self.model._make_predict_function()
        self.graph = tf.get_default_graph()

        self.detection_index = DL_msgs_boxes()

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128  # Saturation
            col[0][0][2] = 255  # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                   int(cvcol[0][0][2]))
            self.class_colors.append(col)

        self.bridge = CvBridge()  # Create the cv_bridge object

        self.Image_Status = "Not_Ready"
        self.StartImage = cv2.imread(pkg_path + '/resources/start.jpg')
        self.to_draw = cv2.resize(self.StartImage, (640, 480))

        self.image_sub = rospy.Subscriber(
            "/floating_sensor/camera/rgb/image_raw",
            Image,
            self.detect_image,
            queue_size=1)  # the appropriate callbacks

        self.box_coordinate_pub = rospy.Publisher(
            "/ssd_detction/box", DL_msgs_boxes,
            queue_size=5)  # the appropriate callbacks
        self.SSD_Serv = rospy.Service('SSD_Detection', DL_box,
                                      self.SSD_Detection_Server)
Beispiel #8
0
def feature_flow():
    bbox_util = BBoxUtility(NUM_CLASSES)
    raw_inputs, images = load_inputs(image_files)
    inputs = preprocess_input(np.array(raw_inputs))

    dump_activation_layer = 'conv4_2'
    compare_layer_name = 'conv6_2'
    print('dump_activation_layer', dump_activation_layer)
    print('target_layer_name', compare_layer_name)

    # normal SSD network
    model1 = SSD300v2(input_shape, num_classes=NUM_CLASSES)
    model1.load_weights('weights_SSD300.hdf5', by_name=True)
    predictions = run_network(model1, inputs)
    results = bbox_util.detection_out(predictions)
    plot_detections(images, results)

    # get dump layer's output (as input for flow network)
    input_img2 = inputs[1:2, :, :, :]
    layer_dump = get_layer_output(model=model1, inputs=input_img2, output_layer_name=dump_activation_layer)
    print('layer_dump.shape = ', layer_dump.shape)

    # flow (raw rgb)
    flow_rgb = compute_flow(image_files[1], image_files[0])

    print('flow.shape', flow_rgb.shape)
    imshow_fig(cv2.cvtColor(draw_hsv(flow_rgb), cv2.COLOR_BGR2RGB), title='flow_rgb')

    # flow (re-sized for feature map)
    flow_feature = get_flow_for_filter(flow_rgb)
    # imshow_fig(flow_feature[:, :, 0], title='flow_feature_y', cmap='gray')
    # imshow_fig(flow_feature[:, :, 1], title='flow_feature_x', cmap='gray')

    # warp image by flow_rgb
    iimg1 = cv2.imread(image_files[0])
    img_warp = warp_flow(iimg1, flow_rgb)
    imshow_fig(cv2.cvtColor(img_warp, cv2.COLOR_BGR2RGB), title='frame_2_warp')

    # shift feature
    shifted_feature = shift_filter(layer_dump, flow_feature)

    # flow net
    model2 = SSD300_conv4_3((128, 128, 512), num_classes=NUM_CLASSES)
    model2.load_weights('weights_SSD300.hdf5', by_name=True)
    predictions = run_network(model2, shifted_feature)
    results = bbox_util.detection_out(predictions)
    plot_detections(images[1:2], results)

    # get specific layer's output and compare them (for debugging)
    compare_model_layer(model1, input_img2, compare_layer_name,
                        model2, shifted_feature, compare_layer_name,
                        True)

    sess.close()
    plt.show()
Beispiel #9
0
    def __init__(self):
        #TODO load classifier
        NUM_CLASSES = 3 + 1
        input_shape = (300, 300, 3)

        # "prior boxes" in the paper
        priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
        self.bbox_util = BBoxUtility(NUM_CLASSES, priors)

        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        self.model.load_weights('weights.180314.hdf5', by_name=True)
Beispiel #10
0
	def __init__(self, input_shape = (300, 300, 3)):
		self.num_class = config.NUM_CLASSES
		self.input_tensor = tf.placeholder(tf.float32, [None, input_shape[0], input_shape[1], input_shape[2]])
		self.label_tensor = tf.placeholder(tf.float32, [None, 7308, 4 + config.NUM_CLASSES + 8])
		self.predicts = self.build(input_shape, config.NUM_CLASSES)
		self.input_shape = input_shape
		self.global_step = tf.train.create_global_step()
		var_list = tf.global_variables()
		var_list = [var for var in var_list if "Adam" not in var.name]
		self.saver = tf.train.Saver(var_list, max_to_keep=1)
		self.bbox_util = BBoxUtility(self.num_class)
Beispiel #11
0
    def __init__(self):
        self.image_width = 300
        self.image_height = 300

        self.voc_classes = [
            'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car',
            'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike',
            'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
        ]
        self.NUM_CLASSES = len(self.voc_classes) + 1

        self.model = SSD300((self.image_height, self.image_width, 3),
                            num_classes=self.NUM_CLASSES)
        self.model.load_weights('weights_SSD300.hdf5', by_name=True)
        self.bbox_util = BBoxUtility(self.NUM_CLASSES)
Beispiel #12
0
    def __init__(self):

        voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
        NUM_CLASSES = len(voc_classes) + 1

        input_shape=(300, 300, 3)
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        weights_file = "./checkpoints/weights.10-2.85.hdf5"        
        #weights_file = "./checkpoints/weights.39-1.61_ubuntu.hdf5"

        self.model.load_weights(weights_file, by_name=True)
        self.bbox_util = BBoxUtility(NUM_CLASSES)
Beispiel #13
0
    def __init__(self,
                 modelfile,
                 shape=(300, 300, 3),
                 num_classes=21,
                 conf_thresh=0.6):

        self.input_shape = shape
        self.num_classes = num_classes
        self.conf_thresh = conf_thresh

        # モデル作成
        model = SSD(shape, num_classes=num_classes)
        model.load_weights(modelfile)
        self.model = model

        # バウンディングボックス作成ユーティリティ
        self.bbox_util = BBoxUtility(self.num_classes)
Beispiel #14
0
	def train(self):
		self.loss = MultiboxLoss(self.num_class, neg_pos_ratio=2.0).compute_loss(self.label_tensor, self.predicts)
		self.loss_avg = tf.reduce_mean(self.loss)
		
		learning_rate = tf.train.exponential_decay(config.lr, self.global_step, 10000 ,0.9, True, name='learning_rate')
		self.train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss, global_step = self.global_step)
		self.train_loss_summary = tf.summary.scalar("loss_train", self.loss_avg)
		self.val_loss_summary = tf.summary.scalar("loss_val", self.loss_avg)
		self.writer = tf.summary.FileWriter(FLAGS.checkpoint)

		priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
		self.bbox_util = BBoxUtility(self.num_class, priors)

		gt = pickle.load(open(FLAGS.label_file, 'rb'))
		keys = sorted(gt.keys())
		num_train = int(round(0.8 * len(keys)))
		train_keys = keys[:num_train]
		val_keys = keys[num_train:]

		gen = Generator(gt, self.bbox_util, config.BATCH_SIZE, FLAGS.images_dir,
		                train_keys, val_keys,
		                (self.input_shape[0], self.input_shape[1]))#, do_crop=False, saturation_var = 0, brightness_var = 0, contrast_var = 0, lighting_std = 0, hflip_prob = 0, vflip_prob = 0)
		c = tf.ConfigProto()
		c.gpu_options.allow_growth = True
		with tf.Session(config=c) as sess:
			sess.run(tf.global_variables_initializer())
			self.writer.add_graph(sess.graph)
			self.restore(sess)
			for inputs, labels in gen.generate(True):
				_, lo, step, summary = sess.run([self.train_op, self.loss_avg, self.global_step, self.train_loss_summary], feed_dict = {self.input_tensor: inputs, self.label_tensor: labels})
				sys.stdout.write("train loss: %d %.3f \r"%(step, lo))
				sys.stdout.flush()
				self.writer.add_summary(summary, step)
				if step % config.save_step == config.save_step - 1:
					self.saver.save(sess, os.path.join(FLAGS.checkpoint, "ckpt"), global_step=self.global_step)
					print("saved")
				if step % config.snapshot_step == 0:
					val_in, val_la = next(gen.generate(False))
					lo, s, preds = sess.run([self.loss_avg, self.train_loss_summary, self.predicts], feed_dict = {self.input_tensor: val_in, self.label_tensor: val_la})
					self.writer.add_summary(s, step)
					print("val loss:", step, lo)
					images = [np.array(val_in[v]) for v in range(val_in.shape[0])]
					self.paint_imgs(preds, images)

		print("Train finished. Checkpoint saved in", FLAGS.checkpoint)
Beispiel #15
0
    def __init__(self, conf_limit=0.6):
        self.conf_limit = conf_limit
        np.set_printoptions(suppress=True)
        config = tf.ConfigProto()
        #config.gpu_options.per_process_gpu_memory_fraction = 0.45
        set_session(tf.Session(config=config))

        self.voc_classes = [
            'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
            'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
            'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
        ]
        NUM_CLASSES = len(self.voc_classes) + 1
        self.bbox_util = BBoxUtility(NUM_CLASSES)

        input_shape = (300, 300, 3)
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        self.model.load_weights('weights_SSD300.hdf5', by_name=True)
Beispiel #16
0
class UseSSD:
    def __init__(self):
        self.image_width = 300
        self.image_height = 300

        self.voc_classes = [
            'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car',
            'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike',
            'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
        ]
        self.NUM_CLASSES = len(self.voc_classes) + 1

        self.model = SSD300((self.image_height, self.image_width, 3),
                            num_classes=self.NUM_CLASSES)
        self.model.load_weights('weights_SSD300.hdf5', by_name=True)
        self.bbox_util = BBoxUtility(self.NUM_CLASSES)

    def normalize(self, img_array):
        return (img_array - np.mean(img_array)) / np.std(img_array) * 16 + 64

    def has_category(self, img_filepath, category_label_name, confidence):

        # 解析用
        with load_img(img_filepath,
                      target_size=(self.image_height,
                                   self.image_width)) as img:
            img_array = img_to_array(img)
            img_array = self.normalize(img_array)

        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        preds = self.model.predict(img_array, batch_size=1, verbose=1)
        results = self.bbox_util.detection_out(preds)

        if len(results) <= 0:
            return

        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]

        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= confidence
        ]

        top_conf = det_conf[top_indices]

        top_label_indices = det_label[top_indices].tolist()

        for i in range(top_conf.shape[0]):
            label = int(top_label_indices[i])
            label_name = self.voc_classes[label - 1]

            if category_label_name == label_name:
                return True

        return False
Beispiel #17
0
 def __init__(
         self,
         class_number=21,
         input_shape=(300, 300, 3),
         priors_file='prior_boxes_ssd300.pkl',
         train_file='VOC2007.pkl',
         path_prefix='./VOCdevkit/VOC2007/JPEGImages/',
         model=None,
         weight_file='weights_SSD300.hdf5',
         freeze=('input_1', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1',
                 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3',
                 'pool3'),
         save_weight_file='/src/resource/checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5',  # noqa
         optim=None,
         batch_size=20,
         nb_worker=1):
     """
     Setting below parameter 
     :param class_number(int): class number
     :param input_shape(set): set input shape  
     :param priors_file(str): set prior file name 
     :param train_file(str): train file name  
     :param path_prefix(str): path prefix 
     :param model(keras model): set the keras model such as the ssd 
     :param weight_file(str): weight file name 
     :param freeze(set): set untraining layer 
     """
     self.input_shape = input_shape
     priors = pickle.load(open(priors_file, 'rb'))
     self.bbox_utils = BBoxUtility(class_number, priors)
     self.train_data = pickle.load(open(train_file, 'rb'))
     keys = sorted(self.train_data.keys())
     num_train = int(round(0.8 * len(keys)))
     self.train_keys = keys[:num_train]
     self.val_keys = keys[num_train:]
     self.num_val = len(self.val_keys)
     self.batch_size = batch_size
     self.gen = Generator(self.train_data,
                          self.bbox_utils,
                          batch_size,
                          path_prefix,
                          self.train_keys,
                          self.val_keys,
                          (self.input_shape[0], self.input_shape[1]),
                          do_crop=True)
     self.model = model
     model.load_weights(weight_file, by_name=True)
     self.freeze = list(freeze)
     self.save_weight_file = save_weight_file
     self.optim = optim
     self.nb_worker = nb_worker
     self.model.compile(optimizer=optim,
                        metrics=['accuracy'],
                        loss=MultiboxLoss(class_number,
                                          neg_pos_ratio=2.0).compute_loss)
Beispiel #18
0
    def __init__(
            self,
            path_weights="/home/francisco/git/ssd_keras/weights_SSD300.hdf5"):
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.45
        set_session(tf.Session(config=config))

        self.labels = [
            'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car',
            'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike',
            'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
        ]
        NUM_CLASSES = len(self.labels) + 1

        input_shape = (300, 300, 3)
        # Get detections with confidence higher than 0.6.
        self.detection_confidence = 0.6
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        self.model.load_weights(path_weights, by_name=True)
        self.bbox_util = BBoxUtility(NUM_CLASSES)
        self.detections = []
Beispiel #19
0
def predict_img(numpy_array, orig_numpy_array):
    # Save the original image for attachment
    scipy.misc.imsave('temp_cat_water.jpg', np.uint8(orig_numpy_array))

    # Number of voc_classes + 1
    NUM_CLASSES = 3
    input_shape = (300, 300, 3)
    # SSD model
    model = SSD300(input_shape, num_classes=NUM_CLASSES)
    model.load_weights('./model/weights.18-0.09.hdf5', by_name=True)
    bbox_util = BBoxUtility(NUM_CLASSES)

    # Inception v3 transfer learning model
    model_cnn = load_model(filepath='./model/model_v2.03-0.40.hdf5')
    ssd_img_size = 300
    img_size = 299

    inputs = []
    images = []

    images.append(orig_numpy_array)

    inputs.append(numpy_array.copy())
    inputs = preprocess_input(np.array(inputs))
    preds = model.predict(inputs, batch_size=1, verbose=0)
    results = bbox_util.detection_out(preds)

    # If the SSD model does not find an appropriate object, automatically return 0.00
    for i, img in enumerate(images):
        if type(results[i]) is not list:
            ssd_img = ssd_image(img, results, i)
            resize_img = imresize(ssd_img, (img_size, img_size))

            x = np.expand_dims(resize_img, axis=0)
            y_pred = model_cnn.predict(x)
            prediction = round(y_pred[0][0], 3)
        else:
            prediction = 0.00

    return prediction
Beispiel #20
0
 def __init__(self, class_names, model, input_shape, confidence):  # {{{
     self.class_names = class_names
     self.num_classes = len(class_names)
     self.model = model
     self.input_shape = input_shape
     self.confidence = confidence
     self.bbox_util = BBoxUtility(self.num_classes)
     self.next_ID = 0
     # Create unique and somewhat visually distinguishable bright
     # colors for the different classes.
     self.class_colors = []
     for i in range(0, self.num_classes):
         # This can probably be written in a more elegant manner
         hue = 255 * i / self.num_classes
         col = np.zeros((1, 1, 3)).astype("uint8")
         col[0][0][0] = hue
         col[0][0][1] = 128  # Saturation
         col[0][0][2] = 255  # Value
         cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
         col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                int(cvcol[0][0][2]))
         self.class_colors.append(col)  # }}}
Beispiel #21
0
def main(img_paths):
    """
    Detect objects in images.

    Parameters
    ----------
    img_paths : list of strings
    """
    # Load the model
    voc_classes = [
        'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat',
        'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person',
        'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
    ]
    NUM_CLASSES = len(voc_classes) + 1
    input_shape = (300, 300, 3)
    model = SSD300(input_shape, num_classes=NUM_CLASSES)
    model.load_weights('weights_SSD300.hdf5', by_name=True)
    bbox_util = BBoxUtility(NUM_CLASSES)

    # Load the inputs
    inputs = []
    images = []
    for img_path in img_paths:
        img = image.load_img(img_path, target_size=(300, 300))
        img = image.img_to_array(img)
        images.append(imread(img_path))
        inputs.append(img.copy())
    inputs = preprocess_input(np.array(inputs))

    # Predict
    preds = model.predict(inputs, batch_size=1, verbose=1)
    results = bbox_util.detection_out(preds)

    # Visualize
    for i, img in enumerate(images):
        create_overlay(img, results[i], voc_classes,
                       "{}-det.png".format(img_paths[i]))
Beispiel #22
0
def main(img_paths):
    """
    Detect objects in images.

    Parameters
    ----------
    img_paths : list of strings
    """
    # Load the model
    voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
                   'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
                   'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant',
                   'Sheep', 'Sofa', 'Train', 'Tvmonitor']
    NUM_CLASSES = len(voc_classes) + 1
    input_shape = (300, 300, 3)
    model = SSD300(input_shape, num_classes=NUM_CLASSES)
    model.load_weights('weights_SSD300.hdf5', by_name=True)
    bbox_util = BBoxUtility(NUM_CLASSES)

    # Load the inputs
    inputs = []
    images = []
    for img_path in img_paths:
        img = image.load_img(img_path, target_size=(300, 300))
        img = image.img_to_array(img)
        images.append(imread(img_path))
        inputs.append(img.copy())
    inputs = preprocess_input(np.array(inputs))

    # Predict
    preds = model.predict(inputs, batch_size=1, verbose=1)
    results = bbox_util.detection_out(preds)

    # Visualize
    for i, img in enumerate(images):
        create_overlay(img, results[i], voc_classes,
                       "{}-det.png".format(img_paths[i]))
Beispiel #23
0
    def __init__(self):
        NUM_CLASSES = 3 + 1
        input_shape = (300, 300, 3)

        config_string = rospy.get_param("/traffic_light_config")
        self.config = yaml.load(config_string)
        self.stop_line_positions = self.config['stop_line_positions']

        # get path to resources
        path_to_resources = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..',
            'tlc')
        # "prior boxes" in the paper
        priors = pickle.load(
            open(os.path.join(path_to_resources, 'prior_boxes_ssd300.pkl'),
                 'rb'))
        self.bbox_util = BBoxUtility(NUM_CLASSES, priors)

        # Traffic Light Classifier model and its weights
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        print(self.model.summary())
        self.model.load_weights(os.path.join(
            path_to_resources, self.config['classifier_weights_file']),
                                by_name=True)

        # prevent TensorFlow's ValueError when no raised backend
        dummy = np.zeros((1, 300, 300, 3))
        _ = self.model.predict(dummy, batch_size=1, verbose=0)

        # prevent TensorFlow's ValueError when no raised backend
        dummy = np.zeros((1, 300, 300, 3))
        _ = self.model.predict(dummy, batch_size=1, verbose=0)

        self.capture_images = False
        self.image_counts = {0: 0, 1: 0, 2: 0, 4: 0}

        self.last_classification = None
def init_model(weight_file='ssd_keras/SSD/weights_SSD300.hdf5'):

    np.set_printoptions(suppress=True)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.45
    set_session(tf.Session(config=config))

    NUM_CLASSES = len(voc_classes) + 1

    input_shape = (300, 300, 3)
    model = SSD300(input_shape, num_classes=NUM_CLASSES)
    model.load_weights(weight_file, by_name=True)
    bbox_util = BBoxUtility(NUM_CLASSES)
    return model, bbox_util
Beispiel #25
0
 def __init__(self, class_names, model, input_shape):
     self.class_names = class_names
     self.num_classes = len(class_names)
     self.model = model
     self.input_shape = input_shape
     self.bbox_util = BBoxUtility(self.num_classes)
     
     # Create unique and somewhat visually distinguishable bright
     # colors for the different classes.
     self.class_colors = []
     for i in range(0, self.num_classes):
         # This can probably be written in a more elegant manner
         hue = 255*i/self.num_classes
         col = np.zeros((1,1,3)).astype("uint8")
         col[0][0][0] = hue
         col[0][0][1] = 128 # Saturation
         col[0][0][2] = 255 # Value
         cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
         col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
         self.class_colors.append(col) 
Beispiel #26
0
class ssdKeras():
    def __init__(self):
        #self.node_name = "ssd_keras"
        #rospy.init_node(self.node_name)
        self.class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
        self.num_classes = len(self.class_names)
        self.input_shape = (300,300,3)
        self.model = SSD(self.input_shape,num_classes=self.num_classes)
        self.model.load_weights('/home/abdulrahman/catkin_ws/src/victim_localization/resources/ssd_keras/weights_SSD300.hdf5')

        self.bbox_util = BBoxUtility(self.num_classes)
        self.conf_thresh = 0.7

        self.model._make_predict_function()
        self.graph = tf.get_default_graph()

        self.detection_index=DL_msgs_boxes()

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255*i/self.num_classes
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            self.class_colors.append(col)

        self.bridge = CvBridge() # Create the cv_bridge object

        self.image_sub = rospy.Subscriber("/image_raw_converted2", Image, self.detect_image,queue_size=1)  # the appropriate callbacks

        self.box_coordinate_pub = rospy.Publisher("/ssd_detction/box", DL_msgs_boxes ,queue_size=5)  # the appropriate callbacks
    def detect_image(self, ros_image):
        """ Runs the test on a video (or webcam)

        # Arguments

        conf_thresh: Threshold of confidence. Any boxes with lower confidence
                     are not visualized.

        """


        #### Use cv_bridge() to convert the ROS image to OpenCV format  ####
        try:
            image_orig = self.bridge.imgmsg_to_cv2(ros_image, "bgr8")
        except CvBridgeError as e:
            print(e)
        ##########

        vidw = 1280.0 # change from cv2.cv.CV_CAP_PROP_FRAME_WIDTH
        vidh = 720.0 # change from cv2.cv.CV_CAP_PROP_FRAME_HEIGHT
        vidar = vidw/vidh

        #print(type(image_orig))
        im_size = (self.input_shape[0], self.input_shape[1])
        resized = cv2.resize(image_orig, im_size)
        rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

        # Reshape to original aspect ratio for later visualization
        # The resized version is used, to visualize what kind of resolution
        # the network has to work with.
        to_draw = cv2.resize(resized, (1280, 720))

        # Use model to predict
        inputs = [image.img_to_array(rgb)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)

        start_time = time.time() #debuggin

        with self.graph.as_default():
            y = self.model.predict(x)
        #print("--- %s seconds_for_one_image ---" % (time.time() - start_time))

        # This line creates a new TensorFlow device every time. Is there a
        # way to avoid that?
        results = self.bbox_util.detection_out(y)

        if len(results) > 0 and len(results[0]) > 0:
        # Interpret output, only one frame is used
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [i for i, conf in enumerate(det_conf) if conf >= self.conf_thresh]

            top_conf = det_conf[top_indices]

            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            #initiaze the detection msgs
            box_msg = DL_msgs_box()
            box_msg.xmin=0
            box_msg.ymin=0
            box_msg.xmax=0
            box_msg.ymax=0
            box_msg.Class="Non" # 100 reflect a non-class value
            self.detection_index.boxes.append(box_msg)


            print (top_xmin)
            for i in range(top_conf.shape[0]):
                    self.detection_index.boxes[:]=[]
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                    #include the corner to be published
                    box_msg = DL_msgs_box()
                    box_msg.xmin=xmin
                    box_msg.ymin=ymin
                    box_msg.xmax=xmax
                    box_msg.ymax=ymax
                    box_msg.Class=self.class_names[int(top_label_indices[i])]
                    self.detection_index.boxes.append(box_msg)

                    # Draw the box on top of the to_draw image

                    class_num = int(top_label_indices[i])
                    if (self.class_names[class_num]=="person"):
                        cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                                      self.class_colors[class_num], 2)
                        text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i])

                        text_top = (xmin, ymin-10)
                        text_bot = (xmin + 80, ymin + 5)
                        text_pos = (xmin + 5, ymin)
                        cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1)
                        cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
                        #cv2.circle(to_draw, (xmax, ymax),1,self.class_colors[class_num],30);

            self.detection_index.header = std_msgs.msg.Header()
            self.detection_index.header.stamp=rospy.Time.now()
            print (self.detection_index)
            self.box_coordinate_pub.publish(self.detection_index)
            self.detection_index.boxes[:]=[]
            #self.detection_index.boxes.clear()
        cv2.imshow("SSD result", to_draw)
        cv2.waitKey(1)

    def main(self):
        rospy.spin()
Beispiel #27
0
            for i in range(nb_epoch):
                fp.write("%d\t%f\t%f\t%f\t%f\n" %
                         (epochs, loss[i], acc[i], val_loss[i], val_acc[i]))


plt.rcParams['figure.figsize'] = (8, 8)
plt.rcParams['image.interpolation'] = 'nearest'

np.set_printoptions(suppress=True)

# 21
NUM_CLASSES = 21  #4
input_shape = (300, 300, 3)

priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb'))
bbox_util = BBoxUtility(NUM_CLASSES, priors)

# gt = pickle.load(open('gt_pascal.pkl', 'rb'))
gt = pickle.load(open('VOC2007.pkl', 'rb'))
keys = sorted(gt.keys())
num_train = int(round(0.8 * len(keys)))
train_keys = keys[:num_train]
val_keys = keys[num_train:]
num_val = len(val_keys)


class Generator(object):
    def __init__(self,
                 gt,
                 bbox_util,
                 batch_size,
Beispiel #28
0
class VideoTest(object):
    """ Class for testing a trained SSD model on a video file and show the
        result in a window. Class is designed so that one VideoTest object 
        can be created for a model, and the same object can then be used on 
        multiple videos and webcams.
        
        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.
                         
            model:       An SSD model. It should already be trained for 
                         images similar to the video to test on.
                         
            input_shape: The shape that the model expects for its input, 
                         as a tuple, for example (300, 300, 3)    
                         
            bbox_util:   An instance of the BBoxUtility class in ssd_utils.py
                         The BBoxUtility needs to be instantiated with 
                         the same number of classes as the length of        
                         class_names.
    
    """
    def __init__(self, class_names, model, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128  # Saturation
            col[0][0][2] = 255  # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                   int(cvcol[0][0][2]))
            self.class_colors.append(col)

    def run(self, video_path=0, start_frame=0, conf_thresh=0.6):
        """ Runs the test on a video (or webcam)
        
        # Arguments
        video_path: A file path to a video to be tested on. Can also be a number, 
                    in which case the webcam with the same number (i.e. 0) is 
                    used instead
                    
        start_frame: The number of the first frame of the video to be processed
                     by the network. 
                     
        conf_thresh: Threshold of confidence. Any boxes with lower confidence 
                     are not visualized.
                    
        """

        vid = cv2.VideoCapture(video_path)
        if not vid.isOpened():
            raise IOError((
                "Couldn't open video file or webcam. If you're "
                "trying to open a webcam, make sure you video_path is an integer!"
            ))

        # Compute aspect ratio of video
        vidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)
        vidar = vidw / vidh

        # Skip frames until reaching start_frame
        if start_frame > 0:
            vid.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame)

        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()

        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return

            im_size = (self.input_shape[0], self.input_shape[1])
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

            # Reshape to original aspect ratio for later visualization
            # The resized version is used, to visualize what kind of resolution
            # the network has to work with.
            to_draw = cv2.resize(
                resized,
                (int(self.input_shape[0] * vidar), self.input_shape[1]))

            # Use model to predict
            inputs = [image.img_to_array(rgb)]
            tmp_inp = np.array(inputs)
            x = preprocess_input(tmp_inp)

            y = self.model.predict(x)

            # This line creates a new TensorFlow device every time. Is there a
            # way to avoid that?
            results = self.bbox_util.detection_out(y)

            # Interpret output, only one frame is used
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= conf_thresh
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            for i in range(top_conf.shape[0]):
                xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                # Draw the box on top of the to_draw image
                class_num = int(top_label_indices[i])
                cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                              self.class_colors[class_num], 2)
                text = self.class_names[class_num] + " " + ('%.2f' %
                                                            top_conf[i])

                text_top = (xmin, ymin - 10)
                text_bot = (xmin + 80, ymin + 5)
                text_pos = (xmin + 5, ymin)
                cv2.rectangle(to_draw, text_top, text_bot,
                              self.class_colors[class_num], -1)
                cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX,
                            0.35, (0, 0, 0), 1)

            # Calculate FPS
            # This computes FPS for everything, not just the model's execution
            # which may or may not be what you want
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0

            # Draw FPS in top left corner
            cv2.rectangle(to_draw, (0, 0), (50, 17), (255, 255, 255), -1)
            cv2.putText(to_draw, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                        (0, 0, 0), 1)

            cv2.imshow("SSD result", to_draw)
            cv2.waitKey(10)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
set_session(tf.Session(config=config))

voc_classes = [
    'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat',
    'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person',
    'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
]
NUM_CLASSES = len(voc_classes) + 1

input_shape = (300, 300, 3)
model = SSD300(input_shape, num_classes=NUM_CLASSES)
model.load_weights('weights_SSD300.hdf5', by_name=True)
bbox_util = BBoxUtility(NUM_CLASSES)

from PIL import Image


def get_rectangle(img_file, img_name, target_file, target_label):

    inputs = []
    images = []
    img_path = '{}/{}.jpg'.format(img_file, img_name)
    im = Image.open(img_path)
    img = image.load_img(img_path, target_size=(300, 300))
    img = image.img_to_array(img)
    images.append(imread(img_path))
    inputs.append(img.copy())
    inputs = preprocess_input(np.array(inputs))
Beispiel #30
0
class VideoTest(object):
    """ Class for testing a trained SSD model on a video file and show the
        result in a window. Class is designed so that one VideoTest object 
        can be created for a model, and the same object can then be used on 
        multiple videos and webcams.
        
        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.
                         
            model:       An SSD model. It should already be trained for 
                         images similar to the video to test on.
                         
            input_shape: The shape that the model expects for its input, 
                         as a tuple, for example (300, 300, 3)    
                         
            bbox_util:   An instance of the BBoxUtility class in ssd_utils.py
                         The BBoxUtility needs to be instantiated with 
                         the same number of classes as the length of        
                         class_names.
    
    """
    
    def __init__(self, class_names, model, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)
        
        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255*i/self.num_classes
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            self.class_colors.append(col) 
        
    def run(self, video_path = 0, start_frame = 0, conf_thresh = 0.6):
        """ Runs the test on a video (or webcam)
        
        # Arguments
        video_path: A file path to a video to be tested on. Can also be a number, 
                    in which case the webcam with the same number (i.e. 0) is 
                    used instead
                    
        start_frame: The number of the first frame of the video to be processed
                     by the network. 
                     
        conf_thresh: Threshold of confidence. Any boxes with lower confidence 
                     are not visualized.
                    
        """
    
        vid = cv2.VideoCapture(video_path)
        if not vid.isOpened():
            raise IOError(("Couldn't open video file or webcam. If you're "
            "trying to open a webcam, make sure you video_path is an integer!"))
        
        # Compute aspect ratio of video     
        vidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)
        vidar = vidw/vidh
        
        # Skip frames until reaching start_frame
        if start_frame > 0:
            vid.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame)
            
        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()
            
        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return
                
            im_size = (self.input_shape[0], self.input_shape[1])    
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
            
            # Reshape to original aspect ratio for later visualization
            # The resized version is used, to visualize what kind of resolution
            # the network has to work with.
            to_draw = cv2.resize(resized, (int(self.input_shape[0]*vidar), self.input_shape[1]))
            
            # Use model to predict 
            inputs = [image.img_to_array(rgb)]
            tmp_inp = np.array(inputs)
            x = preprocess_input(tmp_inp)
            
            y = self.model.predict(x)
            
            
            # This line creates a new TensorFlow device every time. Is there a 
            # way to avoid that?
            results = self.bbox_util.detection_out(y)
            
            if len(results) > 0 and len(results[0]) > 0:
                # Interpret output, only one frame is used 
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                for i in range(top_conf.shape[0]):
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                    # Draw the box on top of the to_draw image
                    class_num = int(top_label_indices[i])
                    cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), 
                                  self.class_colors[class_num], 2)
                    text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i])

                    text_top = (xmin, ymin-10)
                    text_bot = (xmin + 80, ymin + 5)
                    text_pos = (xmin + 5, ymin)
                    cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1)
                    cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
            
            # Calculate FPS
            # This computes FPS for everything, not just the model's execution 
            # which may or may not be what you want
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0
            
            # Draw FPS in top left corner
            cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1)
            cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)
            
            cv2.imshow("SSD result", to_draw)
            cv2.waitKey(10)
Beispiel #31
0
def main(dataset, run, input_shape, seq_start, seq_stop, videopath, conf_thresh, i_seq, outname, batch_size):
    
    print_flush("> Predicting...")
    classes = get_classnames(dataset)
    masker = Masker(dataset)
    
    input_shape = parse_resolution(input_shape)
    
    num_classes = len(classes)+1
    model = get_model(dataset, run, input_shape, num_classes, verbose=False)
    priors = get_priors(model, input_shape)
    bbox_util = BBoxUtility(num_classes, priors)
    
    
    width = input_shape[0]
    height = input_shape[1]
    
    inputs = []
    outputs = []
    old_frame = None
    
    with io.get_reader(videopath) as vid: 
        vlen = len(vid)
        for i_in_seq in range(seq_start, seq_stop):
            if i_in_seq < vlen:
                frame = vid.get_data(i_in_seq)
                frame = masker.mask(frame)
                old_frame = frame
            else:
                frame = old_frame
                
            resized = cv2.resize(frame, (width, height))
            inputs.append(resized)
            
            if len(inputs) == batch_size:
                inputs2 = np.array(inputs)
                inputs2 = inputs2.astype(np.float32)
                inputs2 = preprocess_input(inputs2)
                
                y = model.predict_on_batch(inputs2)
                outputs.append(y)
                
                inputs = []     
        
    preds = np.vstack(outputs)
    
    print_flush("> Processing...")
    all_detections = []   
    seq_len = seq_stop - seq_start
         
    for i in range(seq_len):
        frame_num = i + seq_start
        
        if frame_num < vlen:           
            pred = preds[i, :]
            pred = pred.reshape(1, pred.shape[0], pred.shape[1])
            results = bbox_util.detection_out(pred, soft=False)

            detections = process_results(results, width, height, classes, conf_thresh, frame_num)
            all_detections.append(detections)
    
    dets = pd.concat(all_detections)
    
    # For the first line, we should open in write mode, and then in append mode
    # This way, we still overwrite the files if this script is run multiple times
    open_mode = 'a'
    include_header = False
    if i_seq == 0:
        open_mode = 'w'
        include_header = True

    print_flush("> Writing to {} ...".format(outname))    
    with open(outname, open_mode) as f:
        dets.to_csv(f, header=include_header) 
Beispiel #32
0
    NUM_CLASSES = 21
    args = parser.parse_args()

    with open(args.path_to_settings, 'r') as fp:
        sets = yaml.safe_load(fp)

    input_shape = (sets['img_height'], sets['img_width'], 3)
    batch_size = sets['batch_size']

    priors = pickle.load(
        open(
            os.path.join(dir_path,
                         'priorFiles/prior_boxes_ssd300MobileNetV2.pkl'),
            'rb'))
    bbox_util = BBoxUtility(NUM_CLASSES, priors)

    gt = pickle.load(open(os.path.join(dir_path, 'voc_2007.pkl'), 'rb'))
    keys = sorted(gt.keys())
    num_train = int(round(0.8 * len(keys)))
    train_keys = keys[:num_train]
    val_keys = keys[num_train:]
    num_val = len(val_keys)

    path_prefix = os.path.join(sets['dataset_dir'], 'VOC2007/JPEGImages/')
    gen = Generator(gt,
                    bbox_util,
                    batch_size,
                    path_prefix,
                    train_keys,
                    val_keys, (input_shape[0], input_shape[1]),
Beispiel #33
0
# In[2]:

voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
NUM_CLASSES = len(voc_classes) + 1


# In[3]:

input_shape = (300, 300, 3)
model = SSD300v2(input_shape, num_classes=NUM_CLASSES)
model.load_weights('weights_SSD300.hdf5', by_name=True)
bbox_util = BBoxUtility(NUM_CLASSES)


# In[4]:

inputs = []
images = []
img_path = './pics/fish-bike.jpg'
img = image.load_img(img_path, target_size=(300, 300))
img = image.img_to_array(img)
images.append(imread(img_path))
inputs.append(img.copy())
img_path = './pics/cat.jpg'
img = image.load_img(img_path, target_size=(300, 300))
img = image.img_to_array(img)
images.append(imread(img_path))
Beispiel #34
0
class DetectorSSD(Detector):
    def __init__(
            self,
            path_weights="/home/francisco/git/ssd_keras/weights_SSD300.hdf5"):
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.45
        set_session(tf.Session(config=config))

        self.labels = [
            'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car',
            'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike',
            'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'
        ]
        NUM_CLASSES = len(self.labels) + 1

        input_shape = (300, 300, 3)
        # Get detections with confidence higher than 0.6.
        self.detection_confidence = 0.6
        self.model = SSD300(input_shape, num_classes=NUM_CLASSES)
        self.model.load_weights(path_weights, by_name=True)
        self.bbox_util = BBoxUtility(NUM_CLASSES)
        self.detections = []

    def detect(self, cvImage):
        inputs = []
        images = []
        cvImage = cv2.cvtColor(cvImage, cv2.COLOR_BGR2RGB)
        img = cv2.resize(cvImage, (300, 300))
        images.append(cvImage)
        inputs.append(img.copy().astype(np.float))
        inputs = preprocess_input(np.array(inputs))
        preds = self.model.predict(inputs, batch_size=1, verbose=0)
        results = self.bbox_util.detection_out(preds)

        for i, img in enumerate(images):
            # Parse the outputs.
            det_label = results[i][:, 0]
            det_conf = results[i][:, 1]
            det_xmin = results[i][:, 2]
            det_ymin = results[i][:, 3]
            det_xmax = results[i][:, 4]
            det_ymax = results[i][:, 5]

            # Get detections with confidence higher than 0.6.
            top_indices = [
                i for i, conf in enumerate(det_conf)
                if conf >= self.detection_confidence
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            self.detections = []
            for i in range(top_conf.shape[0]):
                xmin = int(round(top_xmin[i] * img.shape[1]))
                ymin = int(round(top_ymin[i] * img.shape[0]))
                xmax = int(round(top_xmax[i] * img.shape[1]))
                ymax = int(round(top_ymax[i] * img.shape[0]))
                score = float(top_conf[i])
                label = int(top_label_indices[i])
                self.detections.append([label, score, xmin, ymin, xmax, ymax])
        return self.detections
def train_SSD300_NAG(
        master_file,
        train_dir,
        test_dir,
        model_path,
        load_weights_path=r'C:\Users\shingo\jupyter_notebook\tfgpu_py36_work\AI_Edge_Contest\object_detection\SSD_classes_py\all_SSD_module\SSD\weights_SSD300.hdf5',
        epochs=20,
        batch_size=32,
        base_lr=1e-3,
        num_classes=6 + 1,
        callback=[]):
    """
    dtc_train.py のパラメータなどを引数にした関数
    ラベル情報のcsvファイルから訓練画像の領域情報ロードし、SSDのモデル作成する
    ※csvファイルからラベル情報読めるのが良いところ(一般的な物体検出モデルのラベル情報は1画像1xmlファイル)
    画像のサイズは300x300に変換される(ssd_vgg.pyより)
    分類器はVGG16のfine-tuning
    オプティマイザは ネステロフ+モメンタム+SGD(decayあり). 学習率はLearningRateScheduler でも下げる
    Args:
        master_file : 正解の座標(ファイル名, x, y, width, height, ラベルid)一覧のcsvファイルパス.
                      SSDの「背景」ラベルとして使われるため、ラベルidは0を使わないこと!!!
        train_dir : 訓練用画像が入っているフォルダパス
        test_dir : 評価用画像が入っているフォルダパス
        model_path : モデルファイルの保存先パス
        load_weights_path : 重みファイルのパス
        epochs : エポック数
        batch_size : バッチサイズ
        base_lr : 学習率初期値
        num_classes : クラス数。クラス数は「背景(class_id=0固定)」と「分類したいクラス」の数(要するにクラス数+1)にしないと正しくできない!!!!
        callback: 追加するcallbackのリスト。空なら ModelCheckpoint と LearningRateScheduler だけの callback にになる
    Return:
        なし(モデルファイルweight_ssd_best.hdf5 出力)
    """

    #epochs = 20        # エポック数
    #batch_size = 32     # バッチサイズ
    #base_lr =  1e-3     # 学習率初期値
    #num_classes = 11

    # 最適化関数
    # optimizer = keras.optimizers.Adam(lr=base_lr)
    # optimizer = keras.optimizers.RMSprop(lr=base_lr)
    optimizer = keras.optimizers.SGD(lr=base_lr,
                                     momentum=0.9,
                                     decay=1e-6,
                                     nesterov=True)

    # 学習率のスケジュール関数
    def schedule(epoch, decay=0.90):
        return base_lr * decay**(epoch)

    # 正解の座標(ファイル名, x, y, width, height)一覧のcsvファイル
    #master_file = "xywh_train.csv"
    # 訓練用画像が入っているフォルダ
    #train_dir = "ssd_train"
    # 評価用画像が入っているフォルダ
    #test_dir = "ssd_test"

    # 画像ファイル名を指定すると正解座標が返ってくる辞書を作成
    correct_boxes = get_correct_boxes(master_file,
                                      train_dir,
                                      test_dir,
                                      num_classes=num_classes)

    # 画像ファイルパス一覧取得
    train_path_list = glob.glob(os.path.join(train_dir, "*.*"))
    test_path_list = glob.glob(os.path.join(test_dir, "*.*"))
    ## 画像ファイルパス一覧取得
    #train_path_list = []
    #test_path_list = []
    #for folder in glob.glob(os.path.join(train_dir, "*")):
    #    for file in glob.glob(os.path.join(folder, "*.jpg")):
    #        train_path_list.append(file)
    #for folder in glob.glob(os.path.join(test_dir, "*")):
    #    for file in glob.glob(os.path.join(folder, "*.jpg")):
    #        test_path_list.append(file)

    # モデル作成
    model = create_model(num_classes=num_classes)
    print('create_model ok')
    model.load_weights(load_weights_path, by_name=True)
    print('load_weights ok')

    # 入力付近の層をフリーズ
    freeze_layers(model, depth_level=1)
    print('freeze_layers ok')

    model.compile(optimizer=optimizer,
                  loss=MultiboxLoss(num_classes).compute_loss)
    #model.summary()
    plot_model(model, os.path.join(os.path.dirname(model_path),
                                   "model_ssd.png"))

    # デフォルトボックス作成
    priors = create_prior_box()

    # 画像データのジェネレータ作成
    bbox_util = BBoxUtility(num_classes, priors)
    gen = Generator(correct_boxes, bbox_util, train_path_list, test_path_list,
                    (input_shape[0], input_shape[1]), batch_size)

    print("Train Items : {}".format(gen.train_batches))
    print("Test  Items : {}".format(gen.val_batches))

    # コールバック設定
    callbacks = [
        ModelCheckpoint(model_path,
                        verbose=1,
                        save_weights_only=True,
                        save_best_only=True)
    ]  #, LearningRateScheduler(schedule)]
    if len(callback) != 0:
        callbacks.extend(callback)

    #print(model.summary())

    # 学習開始
    start_time = time.time()
    history = model.fit_generator(gen.generate(True),
                                  gen.train_batches // batch_size,
                                  epochs=epochs,
                                  verbose=2,
                                  callbacks=callbacks,
                                  validation_data=gen.generate(False),
                                  validation_steps=gen.val_batches //
                                  batch_size)
    end_time = time.time()

    # 経過時間表示
    elapsed_time = end_time - start_time
    print("Elapsed Time : {0:d} hr {1:d} min {2:d} sec".format(
        int(elapsed_time // 3600), int((elapsed_time % 3600) // 60),
        int(elapsed_time % 60)))
    return history