예제 #1
0
    def inference(self, resized_rgb_image):
        """
        inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding detection output which is used for creating result
        Args:
            resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels)

        Returns:
            result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...]
        """
        input_image = np.expand_dims(resized_rgb_image, axis=0)
        input_tensor = tf.convert_to_tensor(input_image)
        t_begin = time.perf_counter()
        output_dict = self.detection_model(input_tensor)
        inference_time = time.perf_counter() - t_begin  # Seconds

        # Calculate Frames rate (fps)
        self.fps = convert_infr_time_to_fps(inference_time)

        boxes = output_dict['detection_boxes']
        labels = output_dict['detection_classes']
        scores = output_dict['detection_scores']

        class_id = int(self.model_variables['ClassID'])
        score_threshold = float(self.model_variables['MinScore'])
        result = []
        for i in range(boxes.shape[1]):  # number of boxes
            if labels[0, i] == class_id and scores[0, i] > score_threshold:
                result.append({"id": str(class_id) + '-' + str(i), "bbox": boxes[0, i, :].numpy(), "score": scores[0, i]})

        return result
예제 #2
0
    def inference(self, resized_rgb_images) -> list:
        """
        Inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding class id output which is used for creating result
        Args:
            resized_rgb_images: Array of images with shape (no_images, img_height, img_width, channels)
        Returns:
            result: List of class id for each input image. ex: [0, 0, 1, 1, 0]
            scores: The classification confidence for each class. ex: [.99, .75, .80, 1.0]
        """
        if np.shape(resized_rgb_images)[0] == 0:
            return [], []
        # input_image = np.expand_dims(resized_rgb_images, axis=0)
        t_begin = time.perf_counter()
        output_dict = self.classifier_model.predict(resized_rgb_images)
        inference_time = time.perf_counter() - t_begin  # Seconds
        # Calculate Frames rate (fps)
        self.fps = convert_infr_time_to_fps(inference_time)
        result = list(np.argmax(output_dict, axis=1))  # returns class id

        # TODO: optimized without for
        scores = []
        for i, itm in enumerate(output_dict):
            scores.append(itm[result[i]])

        return result, scores
예제 #3
0
    def inference(self, resized_rgb_images) -> list:
        """
        Inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding class id output which is used for creating result
        Args:
            resized_rgb_images: Array of images with shape (no_images, img_height, img_width, channels)
        Returns:
            result: List of class id for each input image. ex: [0, 0, 1, 1, 0]
            scores: The classification confidence for each class. ex: [.99, .75, .80, 1.0]
        """
        if np.shape(resized_rgb_images)[0] == 0:
            return [], []
        resized_rgb_images = (resized_rgb_images * 255).astype("uint8")
        result = []
        net_results = []
        for img in resized_rgb_images:
            img = np.expand_dims(img, axis=0)
            self.interpreter.set_tensor(self.input_details[0]["index"], img)
            t_begin = time.perf_counter()
            self.interpreter.invoke()
            inference_time = time.perf_counter() - t_begin  # Second
            self.fps = convert_infr_time_to_fps(inference_time)
            net_output = self.interpreter.get_tensor(
                self.output_details[0]['index'])[0]
            net_results.append(net_output)
            result.append(np.argmax(net_output))  # returns class id

        # TODO: optimized without for
        scores = []
        for i, itm in enumerate(net_results):
            scores.append((itm[result[i]] - 1) / 255.0)

        return result, scores
예제 #4
0
    def inference(self, resized_rgb_image):
        img, orig_im, dim = self.prep_image(resized_rgb_image, self._inp_dim)
        im_dim = torch.FloatTensor(dim).repeat(1, 2)

        if self._CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()

        # start calculate fps
        t_begin = time.perf_counter()
        with torch.no_grad():
            output = self._model(Variable(img), self._CUDA)
        output = write_results(output,
                               self.confidence,
                               self._num_classes,
                               nms=True,
                               nms_conf=self.nms_threshold)
        inference_time = time.perf_counter() - t_begin
        self.fps = convert_infr_time_to_fps(inference_time)

        im_dim = im_dim.repeat(output.size(0), 1)
        scaling_factor = torch.min(self._inp_dim / im_dim, 1)[0].view(-1, 1)
        output[:, [1, 3]] -= (self._inp_dim -
                              scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
        output[:, [2, 4]] -= (self._inp_dim -
                              scaling_factor * im_dim[:, 1].view(-1, 1)) / 2
        output[:, 1:5] /= scaling_factor
        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i,
                                                                           0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i,
                                                                           1])

        result = []
        for i, pred in enumerate(output):
            c1 = pred[1:3].cpu().int().numpy()  # unormalized [xmin, ymin]
            c2 = pred[3:5].cpu().int().numpy()  # unormalized [xmax, ymax]
            cls = int(pred[-1].cpu())
            score = float(pred[5].cpu())
            if cls == 0:  # person class index is '0' at coco dataset
                bbox_dict = {
                    "id":
                    "1-" + str(i),
                    "bbox": [
                        c1[1] / self.h, c1[0] / self.w, c2[1] / self.h,
                        c2[0] / self.w
                    ],
                    "score":
                    score,
                    "face":
                    None
                }
                result.append(bbox_dict)
        return result
예제 #5
0
    def inference(self, resized_rgb_images):
        """
        Inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding class id output which is used for creating result
        Args:
            resized_rgb_images: Array of images with shape (no_images, img_height, img_width, channels)
        Returns:
            result: List of class id for each input image. ex: [0, 0, 1, 1, 0]
            scores: The classification confidence for each class. ex: [.99, .75, .80, 1.0]
        """
        bindings = self.bindings
        host_inputs = self.host_inputs
        host_outputs = self.host_outputs
        cuda_inputs = self.cuda_inputs
        cuda_outputs = self.cuda_outputs
        stream = self.stream
        t_begin = time.perf_counter()
        result = []
        scores = []
        for img in resized_rgb_images:
            img = np.expand_dims(img, axis=0)
            img = img.astype(np.float32)

            host_inputs[0] = np.ravel(np.zeros_like(img))

            self.cuda_context.push()

            np.copyto(host_inputs[0], img.ravel())
            cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)

            self.engine_context.execute_async(batch_size=1,
                                              bindings=bindings,
                                              stream_handle=stream.handle)

            cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
            stream.synchronize()
            output_dict = host_outputs[0]
            pred = list(np.argmax(host_outputs, axis=1))

            # TODO: optimized without for
            for i, itm in enumerate(host_outputs):
                scores.append(itm[pred[i]])

            result.append(pred[0])
            self.cuda_context.pop()
        inference_time = float(time.perf_counter() - t_begin)
        if len(resized_rgb_images) != 0:
            inference_time = inference_time / len(resized_rgb_images)
        self.fps = convert_infr_time_to_fps(inference_time)
        return result, scores
    def inference(self, resized_rgb_image):
        """
        inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding detection output which is used for creating result
        Args:
            resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels)

        Returns:
            result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...]
        """

        required_image_size = (544, 320)

        input_image = cv.resize(resized_rgb_image, required_image_size)
        input_image = input_image.transpose(2, 0, 1)
        input_image = np.expand_dims(input_image, axis=0)

        t_begin = time.perf_counter()
        output = self.detection_model.infer(
            inputs={self.input_layer: input_image}
        )['detection_out']
        inference_time = time.perf_counter() - t_begin  # Seconds

        # Calculate Frames rate (fps)
        self.fps = convert_infr_time_to_fps(inference_time)

        class_id = int(self.config.get_section_dict('Detector')['ClassID'])
        score_threshold = float(self.config.get_section_dict('Detector')['MinScore'])
        result = []

        for i, (_, label, score, x_min, y_min, x_max, y_max) in enumerate(output[0][0]):
            box = [y_min, x_min, y_max, x_max]
            if label == class_id and score > score_threshold:
                result.append({"id": str(class_id) + '-' + str(i), "bbox": box, "score": score})

        return result
    def inference(self, resized_rgb_image):
        """
        This method will perform inference and return the detected bounding boxes
        Args:
            resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels)

        Returns:
            result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...]

        """
        image = resized_rgb_image
        image = cv2.resize(image, self.model_input_size)
        pil_im = PIL.Image.fromarray(image)
        preprocess = None

        data = openpifpaf.datasets.PilImageList([pil_im],
                                                preprocess=preprocess)
        loader = torch.utils.data.DataLoader(
            data,
            batch_size=1,
            shuffle=False,
            pin_memory=True,
            collate_fn=openpifpaf.datasets.collate_images_anns_meta)

        for images_batch, _, __ in loader:
            np_img = images_batch.numpy()

        bindings = self.bindings
        host_inputs = self.host_inputs
        host_outputs = self.host_outputs
        cuda_inputs = self.cuda_inputs
        cuda_outputs = self.cuda_outputs
        stream = self.stream

        host_inputs[0] = np.ravel(np.zeros_like(np_img))

        self.cuda_context.push()
        t_begin = time.perf_counter()

        np.copyto(host_inputs[0], np.ravel(np_img))
        cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)

        self.engine_context.execute_async(batch_size=1,
                                          bindings=bindings,
                                          stream_handle=stream.handle)

        cif = [None] * 1
        caf = [None] * 1
        cif_names = ['cif']
        caf_names = ['caf']
        for i in range(1, self.engine.num_bindings):
            cuda.memcpy_dtoh_async(host_outputs[i - 1], cuda_outputs[i - 1],
                                   stream)

        stream.synchronize()

        for i in range(1, self.engine.num_bindings):
            shape = self.engine.get_binding_shape(i)
            name = self.engine.get_binding_name(i)
            total_shape = np.prod(shape)
            output = host_outputs[i - 1][0:total_shape]
            output = np.reshape(output, tuple(shape))
            if name in cif_names:
                index_n = cif_names.index(name)
                tmp = torch.from_numpy(output[0])
                cif = tmp.cpu().numpy()
            elif name in caf_names:
                index_n = caf_names.index(name)
                tmp = torch.from_numpy(output[0])
                caf = tmp.cpu().numpy()

        heads = [cif, caf]
        self.cuda_context.pop()

        inference_time = time.perf_counter() - t_begin

        fields = heads

        decoder_time = time.perf_counter()
        decoder = CifCafDecoder()
        predictions = decoder.decode(fields)
        decoder_time = time.perf_counter() - t_begin
        self.fps = convert_infr_time_to_fps(inference_time + decoder_time)

        result = []

        for i, pred_object in enumerate(predictions):
            pred = pred_object.data
            pred_visible = pred[pred[:, 2] > .2]
            xs = pred_visible[:, 0]
            ys = pred_visible[:, 1]

            if len(xs) == 0 or len(ys) == 0:
                continue

            x, y, w, h = pred_object.bbox()

            x_min = int(x)
            x_max = int(x + w)
            y_min = int(y)
            y_max = int(y + h)
            xmin = int(max(x_min - .15 * w, 0))
            xmax = int(min(x_max + .15 * w, self.w))
            ymin = int(max(y_min - .2 * h, 0))
            ymax = int(min(y_max + .05 * h, self.h))
            bbox_dict = {
                "id":
                "1-" + str(i),
                "bbox":
                [ymin / self.h, xmin / self.w, ymax / self.h, xmax / self.w],
                "score":
                0.9,
                "face":
                None
            }

            # extract face bounding box
            if np.all(pred[[0, 1, 2, 5, 6], -1] > 0.15):
                x_min_face = int(pred[6, 0])
                x_max_face = int(pred[5, 0])
                y_max_face = int((pred[5, 1] + pred[6, 1]) / 2)
                y_eyes = int((pred[1, 1] + pred[2, 1]) / 2)
                y_min_face = 2 * y_eyes - y_max_face
                if (y_max_face - y_min_face > 0) and (x_max_face - x_min_face >
                                                      0):
                    h_crop = y_max_face - y_min_face
                    x_min_face = int(max(0, x_min_face - 0.1 * h_crop))
                    y_min_face = int(max(0, y_min_face - 0.1 * h_crop))
                    x_max_face = int(min(self.w, x_min_face + 1.1 * h_crop))
                    y_max_face = int(min(self.h, y_min_face + 1.1 * h_crop))
                    bbox_dict["face"] = [
                        y_min_face / self.h, x_min_face / self.w,
                        y_max_face / self.h, x_max_face / self.w
                    ]

            result.append(bbox_dict)

        return result
예제 #8
0
    def inference(self, resized_rgb_image):
        """
        This method will perform inference and return the detected bounding boxes
        Args:
            resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels)

        Returns:
            result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...]

        """
        pil_im = PIL.Image.fromarray(resized_rgb_image)
        preprocess = openpifpaf.transforms.Compose([
            openpifpaf.transforms.NormalizeAnnotations(),
            openpifpaf.transforms.CenterPadTight(16),
            openpifpaf.transforms.EVAL_TRANSFORM,
        ])
        data = openpifpaf.datasets.PilImageList([pil_im], preprocess=preprocess)
        loader = torch.utils.data.DataLoader(
            data, batch_size=1, pin_memory=True,
            collate_fn=openpifpaf.datasets.collate_images_anns_meta)
        t_begin = time.perf_counter()
        for images_batch, _, __ in loader:
            predictions = self.processor.batch(self.net, images_batch, device=self.device)[0]
        inference_time = time.perf_counter() - t_begin
        self.fps = convert_infr_time_to_fps(inference_time)
        result = []
        for i, pred in enumerate(predictions):
            pred = pred.data
            pred_visible = pred[pred[:, 2] > .2]
            xs = pred_visible[:, 0]
            ys = pred_visible[:, 1]
            x_min = int(xs.min())
            x_max = int(xs.max())
            y_min = int(ys.min())
            y_max = int(ys.max())
            w = x_max - x_min
            h = y_max - y_min
            xmin = int(max(x_min - .15 * w, 0))
            xmax = int(min(x_max + .15 * w, self.w))
            ymin = int(max(y_min - .2 * h, 0))
            ymax = int(min(y_max + .05 * h, self.h))
            bbox_dict = {"id": "1-" + str(i), "bbox": [ymin / self.h, xmin / self.w, ymax / self.h, xmax / self.w],
                    "score": 0.9, "face": None}
            # extracting face bounding box
            if np.all(pred[[0, 1, 2, 5, 6], -1] > 0.15):
                x_min_face = int(pred[6, 0])
                x_max_face = int(pred[5, 0])
                y_max_face = int((pred[5, 1] + pred[6, 1]) / 2)
                y_eyes = int((pred[1, 1] + pred[2, 1]) / 2)
                y_min_face = 2 * y_eyes - y_max_face
                if (y_max_face - y_min_face > 0) and (x_max_face - x_min_face > 0):
                    h_crop = y_max_face - y_min_face
                    x_min_face = int(max(0, x_min_face - 0.1 * h_crop))
                    y_min_face = int(max(0, y_min_face - 0.1 * h_crop))
                    x_max_face = int(min(self.w, x_min_face + 1.1 * h_crop))
                    y_max_face = int(min(self.h, y_min_face + 1.1 * h_crop))
                    bbox_dict["face"] = [y_min_face / self.h, x_min_face / self.w, y_max_face / self.h, x_max_face / self.w]

            result.append(bbox_dict)

        return result