def __init__(self, class_names, weight_path, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.input_shape = input_shape
        self.model = SSD(self.input_shape, num_classes=self.num_classes)
        self.model.load_weights(weight_path)
        self.model._make_predict_function()
        self.bbox_util = BBoxUtility(self.num_classes)
        #self.timer = Timer(1, self.timer_callback)
        #self.current_time = 0
        # self.current_fps = 0
        #self.exec_time = None
        #self.prev_extra_time = None
        #self.extra_time = None
        # self.fps_time_slot = list()
        #self.is_finish = False
        #self.fps_start = False
        #self.fps_start_time = 0

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128  # Saturation
            col[0][0][2] = 255  # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                   int(cvcol[0][0][2]))
            self.class_colors.append(col)
Esempio n. 2
0
    def __init__(self, class_names, model, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128  # Saturation
            col[0][0][2] = 255  # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                   int(cvcol[0][0][2]))
            self.class_colors.append(col)
class MobileNetTest(object):
    """ Class for testing a trained SSD model on a video file and show the
        result in a window. Class is designed so that one VideoTest object
        can be created for a model, and the same object can then be used on
        multiple videos and webcams.

        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.

            model:       An SSD model. It should already be trained for
                         images similar to the video to test on.

            input_shape: The shape that the model expects for its input,
                         as a tuple, for example (300, 300, 3)

            bbox_util:   An instance of the BBoxUtility class in ssd_utils.py
                         The BBoxUtility needs to be instantiated with
                         the same number of classes as the length of
                         class_names.
    """
    def __init__(self, class_names, weight_path, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.input_shape = input_shape
        self.model = SSD(self.input_shape, num_classes=self.num_classes)
        self.model.load_weights(weight_path)
        self.model._make_predict_function()
        self.bbox_util = BBoxUtility(self.num_classes)
        #self.timer = Timer(1, self.timer_callback)
        #self.current_time = 0
        # self.current_fps = 0
        #self.exec_time = None
        #self.prev_extra_time = None
        #self.extra_time = None
        # self.fps_time_slot = list()
        #self.is_finish = False
        #self.fps_start = False
        #self.fps_start_time = 0

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128  # Saturation
            col[0][0][2] = 255  # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                   int(cvcol[0][0][2]))
            self.class_colors.append(col)

    def run(self, frame, frame_num, conf_thresh=0.6):
        """ Runs the test on a video (or webcam)

        # Arguments
        conf_thresh: Threshold of confidence. Any boxes with lower confidence
                     are not visualized.

        """
        output_list = list()
        im_size = (self.input_shape[0], self.input_shape[1])
        resized = cv2.resize(frame, im_size)
        orig_image = cv2.cvtColor(resized, cv2.COLOR_RGB2BGR)
        to_draw = resized

        # Use model to predict
        inputs = [image.img_to_array(orig_image)]
        tmp_inp = np.array(inputs)
        x = preprocess_input(tmp_inp)
        y = self.model.predict(x)
        results = self.bbox_util.detection_out(y)

        if len(results) > 0 and len(results[0]) > 0:
            # Interpret output, only one frame is used
            det_label = results[0][:, 0]
            det_conf = results[0][:, 1]
            det_xmin = results[0][:, 2]
            det_ymin = results[0][:, 3]
            det_xmax = results[0][:, 4]
            det_ymax = results[0][:, 5]

            top_indices = [
                i for i, conf in enumerate(det_conf) if conf >= conf_thresh
            ]

            top_conf = det_conf[top_indices]
            top_label_indices = det_label[top_indices].tolist()
            top_xmin = det_xmin[top_indices]
            top_ymin = det_ymin[top_indices]
            top_xmax = det_xmax[top_indices]
            top_ymax = det_ymax[top_indices]

            output_list.append(frame_num)
            for i in range(top_conf.shape[0]):
                if (top_conf[i] < 0.9):
                    continue
                xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                # Draw the box on top of the to_draw image
                class_num = int(top_label_indices[i])
                cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                              self.class_colors[class_num], 2)
                text = self.class_names[class_num] + " " + ('%.2f' %
                                                            top_conf[i])
                output_list.append(self.class_names[class_num])

                text_top = (xmin, ymin - 10)
                text_bot = (xmin + 80, ymin + 5)
                text_pos = (xmin + 5, ymin)
                cv2.rectangle(to_draw, text_top, text_bot,
                              self.class_colors[class_num], -1)
                cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX,
                            0.35, (0, 0, 0), 1)

        cv2.imshow("SSD result", to_draw)
        cv2.waitKey(10)

        #hit_detection = 0
        #for i in DETECTION_LIST:
        #    hit_detection += output_list.count(i)
        #print(output_list)
        #accuracy = (hit_detection / len(output_list))*100
        #return accuracy

    def draw_fps(self, fps_time_slot):
        x_range = [index for index, value in enumerate(fps_time_slot)]
        y_fps = [value[1] for index, value in enumerate(fps_time_slot)]
        plt.ylim((0, 5))
        plt.plot(x_range, y_fps, c='r')
        plt.xlabel('time')
        plt.ylabel('fps')
        plt.show()
Esempio n. 4
0
class VideoTest(object):
    """ Class for testing a trained SSD model on a video file and show the
        result in a window. Class is designed so that one VideoTest object
        can be created for a model, and the same object can then be used on
        multiple videos and webcams.

        Arguments:
            class_names: A list of strings, each containing the name of a class.
                         The first name should be that of the background class
                         which is not used.

            model:       An SSD model. It should already be trained for
                         images similar to the video to test on.

            input_shape: The shape that the model expects for its input,
                         as a tuple, for example (300, 300, 3)

            bbox_util:   An instance of the BBoxUtility class in ssd_utils.py
                         The BBoxUtility needs to be instantiated with
                         the same number of classes as the length of
                         class_names.

    """
    def __init__(self, class_names, model, input_shape):
        self.class_names = class_names
        self.num_classes = len(class_names)
        self.model = model
        self.input_shape = input_shape
        self.bbox_util = BBoxUtility(self.num_classes)

        # Create unique and somewhat visually distinguishable bright
        # colors for the different classes.
        self.class_colors = []
        for i in range(0, self.num_classes):
            # This can probably be written in a more elegant manner
            hue = 255 * i / self.num_classes
            col = np.zeros((1, 1, 3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128  # Saturation
            col[0][0][2] = 255  # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]),
                   int(cvcol[0][0][2]))
            self.class_colors.append(col)

    def run(self, video_path, start_frame=0, conf_thresh=0.6):
        """ Runs the test on a video (or webcam)

        # Arguments
        video_path: A file path to a video to be tested on. Can also be a number,
                    in which case the webcam with the same number (i.e. 0) is
                    used instead

        start_frame: The number of the first frame of the video to be processed
                     by the network.

        conf_thresh: Threshold of confidence. Any boxes with lower confidence
                     are not visualized.

        """

        vid = cv2.VideoCapture(video_path)
        if not vid.isOpened():
            raise IOError((
                "Couldn't open video file or webcam. If you're "
                "trying to open a webcam, make sure you video_path is an integer!"
            ))

        # Compute aspect ratio of video
        vid.set(cv2.CAP_PROP_FPS, 30)
        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)

        vidar = vidw / vidh

        # Skip frames until reaching start_frame
        if start_frame > 0:
            vid.set(cv2.CAP_PROP_POS_MSEC, start_frame)

        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()

        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return

            im_size = (self.input_shape[0], self.input_shape[1])
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

            # Reshape to original aspect ratio for later visualization
            # The resized version is used, to visualize what kind of resolution
            # the network has to work with.
            to_draw = cv2.resize(
                resized,
                (int(self.input_shape[0] * vidar), self.input_shape[1]))

            # Use model to predict
            inputs = [image.img_to_array(rgb)]
            tmp_inp = np.array(inputs)
            x = preprocess_input(tmp_inp)

            y = self.model.predict(x)

            # This line creates a new TensorFlow device every time. Is there a
            # way to avoid that?
            results = self.bbox_util.detection_out(y)

            if len(results) > 0 and len(results[0]) > 0:
                # Interpret output, only one frame is used
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                top_indices = [
                    i for i, conf in enumerate(det_conf) if conf >= conf_thresh
                ]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                for i in range(top_conf.shape[0]):
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                    # Draw the box on top of the to_draw image
                    class_num = int(top_label_indices[i])
                    cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
                                  self.class_colors[class_num], 2)
                    text = self.class_names[class_num] + " " + ('%.2f' %
                                                                top_conf[i])

                    text_top = (xmin, ymin - 10)
                    text_bot = (xmin + 80, ymin + 5)
                    text_pos = (xmin + 5, ymin)
                    cv2.rectangle(to_draw, text_top, text_bot,
                                  self.class_colors[class_num], -1)
                    cv2.putText(to_draw, text, text_pos,
                                cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1)

            # Calculate FPS
            # This computes FPS for everything, not just the model's execution
            # which may or may not be what you want
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0

            # Draw FPS in top left corner
            cv2.rectangle(to_draw, (0, 0), (50, 17), (255, 255, 255), -1)
            cv2.putText(to_draw, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35,
                        (0, 0, 0), 1)

            cv2.imshow("SSD result", to_draw)
            cv2.waitKey(10)