Example #1
0
 def texts2tensor(self, texts):
     """
     Tranform the texts(dict) to PaddleTensor
     Args:
          texts(dict): texts
     Returns:
          tensor(PaddleTensor): tensor with texts data
     """
     lod = [0]
     data = []
     for i, text in enumerate(texts):
         data += text['processed']
         lod.append(len(text['processed']) + lod[i])
     tensor = PaddleTensor(np.array(data).astype('int64'))
     tensor.name = "words"
     tensor.lod = [lod]
     tensor.shape = [lod[-1], 1]
     return tensor
Example #2
0
    def object_detection(self,
                         paths=None,
                         images=None,
                         use_gpu=False,
                         batch_size=1,
                         output_dir='detection_result',
                         score_thresh=0.5,
                         visualization=True):
        """API of Object Detection.

        :param paths: the path of images.
        :type paths: list, each element is correspond to the path of an image.
        :param images: data of images, [N, H, W, C]
        :type images: numpy.ndarray
        :param use_gpu: whether to use gpu or not.
        :type use_gpu: bool
        :param batch_size: bathc size.
        :type batch_size: int
        :param output_dir: the directory to store the detection result.
        :type output_dir: str
        :param score_thresh: the threshold of detection confidence.
        :type score_thresh: float
        :param visualization: whether to draw bounding box and save images.
        :type visualization: bool
        """
        resize_image = self.ssd.ResizeImage(target_size=300,
                                            interp=1,
                                            max_size=0,
                                            use_cv2=False)
        data_reader = partial(self.ssd.reader,
                              paths,
                              images,
                              resize_image=resize_image)
        batch_reader = fluid.io.batch(data_reader, batch_size=batch_size)
        paths = paths if paths else []
        res = []
        for iter_id, feed_data in enumerate(batch_reader()):
            np_data = np.array(feed_data).astype('float32')
            if np_data.shape == 1:
                np_data = np_data[0]
            else:
                np_data = np.squeeze(np_data, axis=1)
            data_tensor = PaddleTensor(np_data.copy())
            if use_gpu:
                data_out = self.gpu_predictor.run([data_tensor])
            else:
                data_out = self.cpu_predictor.run([data_tensor])
            output = self.ssd.postprocess(paths=paths,
                                          images=images,
                                          data_out=data_out,
                                          score_thresh=score_thresh,
                                          label_names=self.label_names,
                                          output_dir=output_dir,
                                          handle_id=iter_id * batch_size,
                                          visualization=visualization)
            res += output
        return res
Example #3
0
    def reconstruct(self,
                    images=None,
                    paths=None,
                    use_gpu=False,
                    visualization=False,
                    output_dir="dcscn_output"):
        """
        API for super resolution.

        Args:
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR.
            paths (list[str]): The paths of images.
            use_gpu (bool): Whether to use gpu.
            visualization (bool): Whether to save image or not.
            output_dir (str): The path to store output images.

        Returns:
            res (list[dict]): each element in the list is a dict, the keys and values are:
                save_path (str, optional): the path to save images. (Exists only if visualization is True)
                data (numpy.ndarray): data of post processed image.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
                )

        all_data = list()
        for yield_data in reader(images, paths):
            all_data.append(yield_data)

        total_num = len(all_data)
        res = list()

        for i in range(total_num):
            image_x = np.array([all_data[i]['img_x']])
            image_x2 = np.array([all_data[i]['img_x2']])
            dropout = np.array([0])
            image_x = PaddleTensor(image_x.copy())
            image_x2 = PaddleTensor(image_x2.copy())
            drop_out = PaddleTensor(dropout.copy())
            output = self.gpu_predictor.run([
                image_x, image_x2
            ]) if use_gpu else self.cpu_predictor.run([image_x, image_x2])

            output = np.expand_dims(output[0].as_ndarray(), axis=1)

            out = postprocess(data_out=output,
                              org_im=all_data[i]['org_im'],
                              org_im_shape=all_data[i]['org_im_shape'],
                              org_im_path=all_data[i]['org_im_path'],
                              output_dir=output_dir,
                              visualization=visualization)
            res.append(out)
        return res
Example #4
0
 def texts2tensor(self, texts):
     """
     Tranform the texts(dict) to PaddleTensor
     Args:
          texts(list): each element is a dict that must have a named 'processed' key whose value is word_ids, such as
                       texts = [{'processed': [23, 89, 43, 906]}]
     Returns:
          tensor(PaddleTensor): tensor with texts data
     """
     lod = [0]
     data = []
     for i, text in enumerate(texts):
         data += text['processed']
         lod.append(len(text['processed']) + lod[i])
     tensor = PaddleTensor(np.array(data).astype('int64'))
     tensor.name = "words"
     tensor.lod = [lod]
     tensor.shape = [lod[-1], 1]
     return tensor
Example #5
0
    def video_stream_segment(self,
                             frame_org,
                             frame_id,
                             prev_gray,
                             prev_cfd,
                             use_gpu=False):
        """
        API for human video segmentation.

        Args:
           frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR.
           frame_id (int): index of the frame to be decoded.
           prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W]
           prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W]
           use_gpu (bool): Whether to use gpu.

        Returns:
            img_matting (numpy.ndarray): data of segmentation mask.
            cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W]
            optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W]

        """
        resize_h = 192
        resize_w = 192
        is_init = True
        width = int(frame_org.shape[0])
        height = int(frame_org.shape[1])
        disflow = cv2.DISOpticalFlow_create(
            cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
        frame = preprocess_v(frame_org, resize_w, resize_h)
        image = PaddleTensor(np.array([frame.copy()]))
        output = self.gpu_predictor.run(
            [image]) if use_gpu else self.cpu_predictor.run([image])
        score_map = output[1].as_ndarray()
        frame = np.transpose(frame, axes=[1, 2, 0])
        score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0])
        cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
        score_map = 255 * score_map[:, :, 1]
        if frame_id == 1:
            prev_gray = np.zeros((resize_h, resize_w), np.uint8)
            prev_cfd = np.zeros((resize_h, resize_w), np.float32)
            optflow_map = postprocess_v(cur_gray, score_map, prev_gray,
                                        prev_cfd, disflow, is_init)
        else:
            optflow_map = postprocess_v(cur_gray, score_map, prev_gray,
                                        prev_cfd, disflow, is_init)

        optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
        optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8)
        img_matting = cv2.resize(optflow_map, (height, width),
                                 cv2.INTER_LINEAR)

        return [img_matting, cur_gray, optflow_map]
Example #6
0
    def object_detection(self,
                         paths=None,
                         images=None,
                         batch_size=1,
                         use_gpu=False,
                         output_dir='detection_result',
                         score_thresh=0.5,
                         visualization=True):
        """API of Object Detection.

        Args:
            paths (list[str]): The paths of images.
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
            batch_size (int): batch size.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
            score_thresh (float): threshold for object detecion.

        Returns:
            res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
                data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
                    left (float): The X coordinate of the upper left corner of the bounding box;
                    top (float): The Y coordinate of the upper left corner of the bounding box;
                    right (float): The X coordinate of the lower right corner of the bounding box;
                    bottom (float): The Y coordinate of the lower right corner of the bounding box;
                    label (str): The label of detection result;
                    confidence (float): The confidence of detection result.
                save_path (str, optional): The path to save output images.
        """
        paths = paths if paths else list()
        data_reader = partial(reader, paths, images)
        batch_reader = fluid.io.batch(data_reader, batch_size=batch_size)
        res = []
        for iter_id, feed_data in enumerate(batch_reader()):
            feed_data = np.array(feed_data)
            image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy())
            if use_gpu:
                data_out = self.gpu_predictor.run([image_tensor])
            else:
                data_out = self.cpu_predictor.run([image_tensor])

            output = postprocess(
                paths=paths,
                images=images,
                data_out=data_out,
                score_thresh=score_thresh,
                label_names=self.label_names,
                output_dir=output_dir,
                handle_id=iter_id * batch_size,
                visualization=visualization)
            res.extend(output)
        return res
Example #7
0
def fake_input(img):
    image = PaddleTensor()
    image.name = "image"
    image.shape = img.shape
    image.dtype = PaddleDType.FLOAT32
    image.data = PaddleBuf(img.flatten().tolist())
    return [image]
 def copyToTensor(self, batch_size):
     tensor = PaddleTensor()
     tensor.name = self.name
     tensor.shape = [batch_size, self.shape_size]
     tensor.dtype = self.list[self.dtype]
     tensor.data = PaddleBuf(self.data)
     return tensor
Example #9
0
 def texts2tensor(self, texts):
     """
     Tranform the texts(list) to PaddleTensor
     Args:
          texts(list): texts
     Returns:
          tensor(PaddleTensor): tensor with texts data
     """
     lod = [0]
     data = []
     for i, text in enumerate(texts):
         text_inds = word_to_ids(text,
                                 self.word2id_dict,
                                 self.word_replace_dict,
                                 oov_id=self.oov_id)
         data += text_inds
         lod.append(len(text_inds) + lod[i])
     tensor = PaddleTensor(np.array(data).astype('int64'))
     tensor.name = "words"
     tensor.lod = [lod]
     tensor.shape = [lod[-1], 1]
     return tensor
Example #10
0
def array2tensor(ndarray):
    """ convert numpy array to PaddleTensor"""
    assert isinstance(ndarray, np.ndarray), "input type must be np.ndarray"
    tensor = PaddleTensor()
    tensor.name = "data"
    tensor.shape = ndarray.shape
    if "float" in str(ndarray.dtype):
        tensor.dtype = PaddleDType.FLOAT32
    elif "int" in str(ndarray.dtype):
        tensor.dtype = PaddleDType.INT64
    else:
        raise ValueError("{} type ndarray is unsupported".format(tensor.dtype))

    tensor.data = PaddleBuf(ndarray.flatten().tolist())
    return tensor
Example #11
0
def preprocess(img):
    img = cv2.resize(img, (input_size, input_size))
    img = img.transpose((2, 0, 1))
    if modelname == "mobilenet-ssd":
        img = (img - 127.5) * 0.007843
    else:
        mean = np.array([103.94, 116.669, 123.68],
                        np.float32).reshape([3, 1, 1])
        img = img - mean
    image = PaddleTensor()
    image.name = "data"
    image.shape = [1, 3, input_size, input_size]
    image.dtype = PaddleDType.FLOAT32
    image.data = PaddleBuf(img.flatten().astype("float32").tolist())
    return [image]
Example #12
0
def warp_input(image_data, input_size):
    """
    deal input to paddle tensor
    :param image_data:          输入的图像
    :param image_shape:         原始图像的大小
    :param input_size:          输入图像的大小
    :return:
    """
    # image data
    image = PaddleTensor()
    image.name = 'image'
    image.shape = input_size
    image.dtype = PaddleDType.FLOAT32
    image.data = PaddleBuf(image_data.flatten().astype(np.float32).tolist())

    return image
Example #13
0
    def predict_proba(self, text_list, batch_size=32, max_seq_len=300):
        """预测 返回概率
        """
        predict_time = 0
        tokenize_time = 0
        res_list = list()
        for cur_batch_data_ids, cur_tokenize_time in \
                self.batch(text_list, batch_size, max_seq_len, max_ensure=False):
            tokenize_time += cur_tokenize_time
            start_time = time.time()
            if self.zero_copy:
                self.input_tensor.copy_from_cpu(np.array(cur_batch_data_ids))
                self.predictor.zero_copy_run()
                logits = self.output_tensor.copy_to_cpu()
            else:
                data_tensor = [PaddleTensor(np.array(cur_batch_data_ids))]
                logits = self.predictor.run(data_tensor)[0].as_ndarray()
            predict_time += time.time() - start_time

            res_list.append(logits)
        logging.info("predict time: %.4fs, tokenize_time: %.4fs"\
                % (predict_time, tokenize_time))
        return np.concatenate(res_list, axis=0)
Example #14
0
    def segmentation(self,
                     images=None,
                     paths=None,
                     data=None,
                     batch_size=1,
                     use_gpu=False,
                     output_dir='ace2p_output',
                     visualization=False):
        """
        API for human parsing.

        Args:
            images (list[numpy.ndarray]): images data, shape of each is [H, W, C], color space is BGR.
            paths (list[str]): The paths of images.
            batch_size (int): batch size.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save output images or not.

        Returns:
            res (list[dict]): The result of human parsing and original path of images.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly."
                )

        # compatibility with older versions
        if data and 'image' in data:
            if paths is None:
                paths = []
            paths += data['image']

        # get all data
        all_data = []
        scale = (473, 473)  # size of preprocessed image.
        rotation = 0  # rotation angle, used for obtaining affine matrix in preprocess.
        for yield_data in reader(images, paths, scale, rotation):
            all_data.append(yield_data)

        total_num = len(all_data)
        loop_num = int(np.ceil(total_num / batch_size))

        res = []
        for iter_id in range(loop_num):
            batch_data = list()
            handle_id = iter_id * batch_size
            for image_id in range(batch_size):
                try:
                    batch_data.append(all_data[handle_id + image_id])
                except:
                    pass
            # feed batch image
            batch_image = np.array([data['image'] for data in batch_data])
            batch_image = PaddleTensor(batch_image.astype('float32'))
            data_out = self.gpu_predictor.run([
                batch_image
            ]) if use_gpu else self.cpu_predictor.run([batch_image])
            # postprocess one by one
            for i in range(len(batch_data)):
                out = postprocess(
                    data_out=data_out[0].as_ndarray()[i],
                    org_im=batch_data[i]['org_im'],
                    org_im_path=batch_data[i]['org_im_path'],
                    image_info=batch_data[i]['image_info'],
                    output_dir=output_dir,
                    visualization=visualization,
                    palette=self.palette)
                res.append(out)
        return res
def array2tensor(ndarray):
    """ convert numpy array to PaddleTensor"""
    assert isinstance(ndarray, np.ndarray), "input type must be np.ndarray"
    tensor = PaddleTensor(data=ndarray)
    return tensor
Example #16
0
    def test_inference_api(self):
        tensor32 = np.random.randint(10, 20, size=[20, 2]).astype('int32')
        paddletensor32 = PaddleTensor(tensor32)
        value32 = np.array(paddletensor32.data.int32_data()).reshape(*[20, 2])
        dtype32 = paddletensor32.dtype
        self.assertEqual(value32.all(), tensor32.all())
        self.assertEqual(dtype32, PaddleDType.INT32)
        self.assertEqual(
            type(paddletensor32.data.tolist('int32')), type(tensor32.tolist()))
        self.assertEqual(
            paddletensor32.data.tolist('int32'), tensor32.ravel().tolist())
        self.assertEqual(type(paddletensor32.as_ndarray()), type(tensor32))
        paddletensor32.data.reset(tensor32)
        self.assertEqual(paddletensor32.as_ndarray().all(), tensor32.all())

        tensor64 = np.random.randint(10, 20, size=[20, 2]).astype('int64')
        paddletensor64 = PaddleTensor(tensor64)
        value64 = np.array(paddletensor64.data.int64_data()).reshape(*[20, 2])
        dtype64 = paddletensor64.dtype
        self.assertEqual(value64.all(), tensor64.all())
        self.assertEqual(dtype64, PaddleDType.INT64)
        self.assertEqual(
            type(paddletensor64.data.tolist('int64')), type(tensor64.tolist()))
        self.assertEqual(
            paddletensor64.data.tolist('int64'), tensor64.ravel().tolist())
        self.assertEqual(type(paddletensor64.as_ndarray()), type(tensor64))
        paddletensor64.data.reset(tensor64)
        self.assertEqual(paddletensor64.as_ndarray().all(), tensor64.all())

        tensor_float = np.random.randn(20, 2).astype('float32')
        paddletensor_float = PaddleTensor(tensor_float)
        value_float = np.array(paddletensor_float.data.float_data()).reshape(
            *[20, 2])
        dtype_float = paddletensor_float.dtype
        self.assertEqual(value_float.all(), tensor_float.all())
        self.assertEqual(dtype_float, PaddleDType.FLOAT32)
        self.assertEqual(
            type(paddletensor_float.data.tolist('float32')),
            type(tensor_float.tolist()))
        self.assertEqual(
            paddletensor_float.data.tolist('float32'),
            tensor_float.ravel().tolist())
        self.assertEqual(
            type(paddletensor_float.as_ndarray()), type(tensor_float))
        paddletensor_float.data.reset(tensor_float)
        self.assertEqual(paddletensor_float.as_ndarray().all(),
                         tensor_float.all())
Example #17
0
    def segment(self,
                images=None,
                paths=None,
                batch_size=1,
                use_gpu=False,
                visualization=False,
                output_dir='humanseg_server_output'):
        """
        API for human segmentation.

        Args:
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR.
            paths (list[str]): The paths of images.
            batch_size (int): batch size.
            use_gpu (bool): Whether to use gpu.
            visualization (bool): Whether to save image or not.
            output_dir (str): The path to store output images.

        Returns:
            res (list[dict]): each element in the list is a dict, the keys and values are:
                save_path (str, optional): the path to save images. (Exists only if visualization is True)
                data (numpy.ndarray): data of post processed image.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
                )

        # compatibility with older versions

        all_data = list()
        for yield_data in reader(images, paths):
            all_data.append(yield_data)
        total_num = len(all_data)
        loop_num = int(np.ceil(total_num / batch_size))
        res = list()
        for iter_id in range(loop_num):
            batch_data = list()
            handle_id = iter_id * batch_size
            for image_id in range(batch_size):
                try:
                    batch_data.append(all_data[handle_id + image_id])
                except:
                    pass
            # feed batch image
            batch_image = np.array([data['image'] for data in batch_data])
            batch_image = PaddleTensor(batch_image.copy())
            output = self.gpu_predictor.run([
                batch_image
            ]) if use_gpu else self.cpu_predictor.run([batch_image])
            output = output[1].as_ndarray()
            output = np.expand_dims(output[:, 1, :, :], axis=1)
            # postprocess one by one
            for i in range(len(batch_data)):
                out = postprocess(data_out=output[i],
                                  org_im=batch_data[i]['org_im'],
                                  org_im_shape=batch_data[i]['org_im_shape'],
                                  org_im_path=batch_data[i]['org_im_path'],
                                  output_dir=output_dir,
                                  visualization=visualization)
                res.append(out)
        return res
Example #18
0
 def video_segment(self,
                   video_path=None,
                   use_gpu=False,
                   save_dir='humanseg_server_video'):
     resize_h = 512
     resize_w = 512
     if not video_path:
         cap_video = cv2.VideoCapture(0)
     else:
         cap_video = cv2.VideoCapture(video_path)
     if not cap_video.isOpened():
         raise IOError("Error opening video stream or file, "
                       "--video_path whether existing: {}"
                       " or camera whether working".format(video_path))
     width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
     disflow = cv2.DISOpticalFlow_create(
         cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
     prev_gray = np.zeros((resize_h, resize_w), np.uint8)
     prev_cfd = np.zeros((resize_h, resize_w), np.float32)
     is_init = True
     fps = cap_video.get(cv2.CAP_PROP_FPS)
     if video_path is not None:
         print('Please wait. It is computing......')
         if not osp.exists(save_dir):
             os.makedirs(save_dir)
         save_path = osp.join(save_dir, 'result' + '.avi')
         cap_out = cv2.VideoWriter(
             save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps,
             (width, height))
         while cap_video.isOpened():
             ret, frame_org = cap_video.read()
             if ret:
                 frame = preprocess_v(frame_org, resize_w, resize_h)
                 image = PaddleTensor(np.array([frame.copy()]))
                 output = self.gpu_predictor.run([
                     image
                 ]) if use_gpu else self.cpu_predictor.run([image])
                 score_map = output[1].as_ndarray()
                 frame = np.transpose(frame, axes=[1, 2, 0])
                 score_map = np.transpose(np.squeeze(score_map, 0),
                                          axes=[1, 2, 0])
                 cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                 cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
                 score_map = 255 * score_map[:, :, 1]
                 optflow_map = postprocess_v(cur_gray, score_map, prev_gray,
                                             prev_cfd, disflow, is_init)
                 prev_gray = cur_gray.copy()
                 prev_cfd = optflow_map.copy()
                 optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
                 optflow_map = threshold_mask(optflow_map,
                                              thresh_bg=0.2,
                                              thresh_fg=0.8)
                 img_matting = cv2.resize(optflow_map, (width, height),
                                          cv2.INTER_LINEAR)
                 img_matting = np.repeat(img_matting[:, :, np.newaxis],
                                         3,
                                         axis=2)
                 bg_im = np.ones_like(img_matting) * 255
                 comb = (img_matting * frame_org +
                         (1 - img_matting) * bg_im).astype(np.uint8)
                 cap_out.write(comb)
             else:
                 break
         cap_video.release()
         cap_out.release()
     else:
         while cap_video.isOpened():
             ret, frame_org = cap_video.read()
             if ret:
                 frame = preprocess_v(frame_org, resize_w, resize_h)
                 image = PaddleTensor(np.array([frame.copy()]))
                 output = self.gpu_predictor.run([
                     image
                 ]) if use_gpu else self.cpu_predictor.run([image])
                 score_map = output[1].as_ndarray()
                 frame = np.transpose(frame, axes=[1, 2, 0])
                 score_map = np.transpose(np.squeeze(score_map, 0),
                                          axes=[1, 2, 0])
                 cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                 cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
                 score_map = 255 * score_map[:, :, 1]
                 optflow_map = postprocess_v(cur_gray, score_map, prev_gray,
                                             prev_cfd, disflow, is_init)
                 prev_gray = cur_gray.copy()
                 prev_cfd = optflow_map.copy()
                 optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
                 optflow_map = threshold_mask(optflow_map,
                                              thresh_bg=0.2,
                                              thresh_fg=0.8)
                 img_matting = cv2.resize(optflow_map, (width, height),
                                          cv2.INTER_LINEAR)
                 img_matting = np.repeat(img_matting[:, :, np.newaxis],
                                         3,
                                         axis=2)
                 bg_im = np.ones_like(img_matting) * 255
                 comb = (img_matting * frame_org +
                         (1 - img_matting) * bg_im).astype(np.uint8)
                 cv2.imshow('HumanSegmentation', comb)
                 if cv2.waitKey(1) & 0xFF == ord('q'):
                     break
             else:
                 break
         cap_video.release()
Example #19
0
    def face_detection(self,
                       images=None,
                       paths=None,
                       data=None,
                       batch_size=1,
                       use_gpu=False,
                       output_dir='face_detector_320_predict_output',
                       visualization=False,
                       confs_threshold=0.5,
                       iou_threshold=0.5):
        """
        API for face detection.

        Args:
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR.
            paths (list[str]): The paths of images.
            batch_size (int): batch size.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
            confs_threshold (float): threshold for confidence coefficient.
            iou_threshold (float): threshold for iou.

        Returns:
            res (list[dict()]): The result of face detection and save path of images.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly."
                )

        # compatibility with older versions
        if data and 'image' in data:
            if paths is None:
                paths = []
            paths += data['image']

        # get all data
        all_data = []
        for yield_data in reader(images, paths):
            all_data.append(yield_data)

        total_num = len(all_data)
        loop_num = int(np.ceil(total_num / batch_size))

        res = []
        for iter_id in range(loop_num):
            batch_data = list()
            handle_id = iter_id * batch_size
            for image_id in range(batch_size):
                try:
                    batch_data.append(all_data[handle_id + image_id])
                except:
                    pass
            # feed batch image
            batch_image = np.array([data['image'] for data in batch_data])
            batch_image = PaddleTensor(batch_image.astype('float32'))
            data_out = self.gpu_predictor.run([
                batch_image
            ]) if use_gpu else self.cpu_predictor.run([batch_image])
            confidences = data_out[0].as_ndarray()
            boxes = data_out[1].as_ndarray()

            # postprocess one by one
            for i in range(len(batch_data)):
                out = postprocess(confidences=confidences[i],
                                  boxes=boxes[i],
                                  orig_im=batch_data[i]['orig_im'],
                                  orig_im_shape=batch_data[i]['orig_im_shape'],
                                  orig_im_path=batch_data[i]['orig_im_path'],
                                  output_dir=output_dir,
                                  visualization=visualization,
                                  confs_threshold=confs_threshold,
                                  iou_threshold=iou_threshold)
                res.append(out)
        return res
Example #20
0
    def face_detection(self,
                       images=None,
                       paths=None,
                       data=None,
                       use_gpu=False,
                       output_dir='detection_result',
                       visualization=False,
                       shrink=0.5,
                       confs_threshold=0.6):
        """
        API for face detection.

        Args:
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
            paths (list[str]): The paths of images.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
            shrink (float): parameter to control the resize scale in preprocess.
            confs_threshold (float): confidence threshold.

        Returns:
            res (list[dict]): The result of face detection and save path of images.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly."
                )

        # compatibility with older versions
        if data:
            if 'image' in data:
                if paths is None:
                    paths = list()
                paths += data['image']
            elif 'data' in data:
                if images is None:
                    images = list()
                images += data['data']

        res = list()
        # process one by one
        for element in reader(images, paths, shrink):
            image = np.expand_dims(element['image'], axis=0).astype('float32')
            image_tensor = PaddleTensor(image.copy())
            data_out = self.gpu_predictor.run([
                image_tensor
            ]) if use_gpu else self.cpu_predictor.run([image_tensor])
            out = postprocess(
                data_out=data_out[0].as_ndarray(),
                org_im=element['org_im'],
                org_im_path=element['org_im_path'],
                image_width=element['image_width'],
                image_height=element['image_height'],
                output_dir=output_dir,
                visualization=visualization,
                shrink=shrink,
                confs_threshold=confs_threshold)
            res.append(out)
        return res
Example #21
0
    def bald(self,
             images=None,
             paths=None,
             data=None,
             use_gpu=False,
             org_labels=[[0., 0., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1.]],
             target_labels=None,
             visualization=True,
             output_dir="bald_output"):
        """
        API for super resolution.

        Args:
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR.
            paths (list[str]): The paths of images.
            data (dict): key is 'image', the corresponding value is the path to image.
            use_gpu (bool): Whether to use gpu.
            visualization (bool): Whether to save image or not.
            output_dir (str): The path to store output images.

        Returns:
            res (list[dict]): each element in the list is a dict, the keys and values are:
                save_path (str, optional): the path to save images. (Exists only if visualization is True)
                data (numpy.ndarray): data of post processed image.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
                )

        if data and 'image' in data:
            if paths is None:
                paths = list()
            paths += data['image']

        all_data = list()
        for yield_data in reader(images, paths, org_labels, target_labels):
            all_data.append(yield_data)

        total_num = len(all_data)
        res = list()
        outputs = []
        for i in range(total_num):
            image_np = all_data[i]['img']
            org_label_np = [all_data[i]['org_label']]
            target_label_np = [all_data[i]['target_label']]
            for j in range(5):
                if j % 2 == 0:
                    label_trg_tmp = copy.deepcopy(target_label_np)
                    new_i = 0
                    label_trg_tmp[0][new_i] = 1.0 - label_trg_tmp[0][new_i]
                    label_trg_tmp = check_attribute_conflict(label_trg_tmp)
                    change_num = j * 0.02 + 0.3
                    label_org_tmp = list(
                        map(lambda x: ((x * 2) - 1) * change_num,
                            org_label_np))
                    label_trg_tmp = list(
                        map(lambda x: ((x * 2) - 1) * change_num,
                            label_trg_tmp))

                    image = PaddleTensor(image_np.copy())
                    org_label = PaddleTensor(
                        np.array(label_org_tmp).astype('float32'))
                    target_label = PaddleTensor(
                        np.array(label_trg_tmp).astype('float32'))

                    output = self.gpu_predictor.run([
                        image, target_label, org_label
                    ]) if use_gpu else self.cpu_predictor.run(
                        [image, org_label, target_label])
                    outputs.append(output)

            out = postprocess(data_out=outputs,
                              org_im=all_data[i]['org_im'],
                              org_im_path=all_data[i]['org_im_path'],
                              output_dir=output_dir,
                              visualization=visualization)
            res.append(out)
        return res
 def predict_det(self, inputs):
     inputs = PaddleTensor(inputs.copy())
     result = self.predictor.run([inputs])
     output_data = result[0].as_ndarray()
     return output_data
Example #23
0
    def face_detection(self,
                       images=None,
                       paths=None,
                       data=None,
                       use_gpu=False,
                       output_dir='detection_result',
                       visualization=False,
                       score_thresh=0.15):
        """
        API for face detection.

        Args:
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
            paths (list[str]): The paths of images.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
            score_thresh (float): score threshold to limit the detection result.

        Returns:
            res (list[dict]): The result of face detection, keys are 'data' and 'path', the correspoding values are:
            data (list[dict]): 5 keys, where
                'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box,
                'confidence' is the confidence this bbox.
            path (str): The path of original image.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
                )

        # compatibility with older versions
        if data:
            if 'image' in data:
                if paths is None:
                    paths = list()
                paths += data['image']

        res = list()
        # process one by one
        for element in reader(images, paths):
            image = np.expand_dims(element['image'], axis=0).astype('float32')
            image_tensor = PaddleTensor(image.copy())
            data_out = self.gpu_predictor.run([
                image_tensor
            ]) if use_gpu else self.cpu_predictor.run([image_tensor])
            # print(len(data_out))  # 1
            out = postprocess(
                data_out=data_out[0].as_ndarray(),
                org_im=element['org_im'],
                org_im_path=element['org_im_path'],
                org_im_width=element['org_im_width'],
                org_im_height=element['org_im_height'],
                output_dir=output_dir,
                visualization=visualization,
                score_thresh=score_thresh)
            res.append(out)
        return res
Example #24
0
    def object_detection(self,
                         paths=None,
                         images=None,
                         batch_size=1,
                         use_gpu=False,
                         output_dir='yolov3_pedestrian_detect_output',
                         score_thresh=0.2,
                         visualization=True):
        """API of Object Detection.

        Args:
            paths (list[str]): The paths of images.
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
            batch_size (int): batch size.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
            score_thresh (float): threshold for object detecion.

        Returns:
            res (list[dict]): The result of pedestrian detecion. keys include 'data', 'save_path', the corresponding value is:
                data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
                    left (float): The X coordinate of the upper left corner of the bounding box;
                    top (float): The Y coordinate of the upper left corner of the bounding box;
                    right (float): The X coordinate of the lower right corner of the bounding box;
                    bottom (float): The Y coordinate of the lower right corner of the bounding box;
                    label (str): The label of detection result;
                    confidence (float): The confidence of detection result.
                save_path (str, optional): The path to save output images.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
                )

        paths = paths if paths else list()
        data_reader = partial(reader, paths, images)
        batch_reader = fluid.io.batch(data_reader, batch_size=batch_size)
        res = []
        for iter_id, feed_data in enumerate(batch_reader()):
            feed_data = np.array(feed_data)
            image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])))
            im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1])))
            if use_gpu:
                data_out = self.gpu_predictor.run([image_tensor, im_size_tensor])
            else:
                data_out = self.cpu_predictor.run([image_tensor, im_size_tensor])

            output = postprocess(
                paths=paths,
                images=images,
                data_out=data_out,
                score_thresh=score_thresh,
                label_names=self.label_names,
                output_dir=output_dir,
                handle_id=iter_id * batch_size,
                visualization=visualization)
            res.extend(output)
        return res
Example #25
0
 def array2tensor(self, arr_data):
     """
     convert numpy array to PaddleTensor
     """
     tensor_data = PaddleTensor(arr_data)
     return tensor_data
Example #26
0
    def face_detection(self,
                       images=None,
                       paths=None,
                       data=None,
                       batch_size=1,
                       use_gpu=False,
                       visualization=False,
                       output_dir='detection_result',
                       use_multi_scale=False,
                       shrink=0.5,
                       confs_threshold=0.6):
        """
        API for face detection.

        Args:
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space must be BGR.
            paths (list[str]): The paths of images.
            batch_size (int): batch size of image tensor to be fed into the later classification network.
            use_gpu (bool): Whether to use gpu.
            visualization (bool): Whether to save image or not.
            output_dir (str): The path to store output images.
            use_multi_scale (bool): whether to enable multi-scale face detection. Enabling multi-scale face detection
                can increase the accuracy to detect faces, however,
                it reduce the prediction speed for the increase model calculation.
            shrink (float): parameter to control the resize scale in preprocess.
            confs_threshold (float): confidence threshold.

        Returns:
            res (list[dict]): The result of face detection and save path of images.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly."
                )

        # compatibility with older versions
        if data:
            if 'image' in data:
                if paths is None:
                    paths = list()
                paths += data['image']
            elif 'data' in data:
                if images is None:
                    images = list()
                images += data['data']

        # get all data
        all_element = list()
        for yield_data in reader(self.face_detector, shrink, confs_threshold,
                                 images, paths, use_gpu, use_multi_scale):
            all_element.append(yield_data)

        image_list = list()
        element_image_num = list()
        for i in range(len(all_element)):
            element_image = [
                handled['image'] for handled in all_element[i]['preprocessed']
            ]
            element_image_num.append(len(element_image))
            image_list.extend(element_image)

        total_num = len(image_list)
        loop_num = int(np.ceil(total_num / batch_size))

        predict_out = np.zeros((1, 2))
        for iter_id in range(loop_num):
            batch_data = list()
            handle_id = iter_id * batch_size
            for element_id in range(batch_size):
                try:
                    batch_data.append(image_list[handle_id + element_id])
                except:
                    pass

            image_arr = np.squeeze(np.array(batch_data), axis=1)
            image_tensor = PaddleTensor(image_arr.copy())
            data_out = self.gpu_predictor.run([
                image_tensor
            ]) if use_gpu else self.cpu_predictor.run([image_tensor])
            # len(data_out) == 1
            # data_out[0].as_ndarray().shape == (-1, 2)
            data_out = data_out[0].as_ndarray()
            predict_out = np.concatenate((predict_out, data_out))

        predict_out = predict_out[1:]
        # postprocess one by one
        res = list()
        for i in range(len(all_element)):
            detect_faces_list = [
                handled['face'] for handled in all_element[i]['preprocessed']
            ]
            interval_left = sum(element_image_num[0:i])
            interval_right = interval_left + element_image_num[i]
            out = postprocess(
                confidence_out=predict_out[interval_left:interval_right],
                org_im=all_element[i]['org_im'],
                org_im_path=all_element[i]['org_im_path'],
                detected_faces=detect_faces_list,
                output_dir=output_dir,
                visualization=visualization)
            res.append(out)
        return res
Example #27
0
    def object_detection(self,
                         paths=None,
                         images=None,
                         data=None,
                         use_gpu=False,
                         batch_size=1,
                         output_dir='detection_result',
                         score_thresh=0.5,
                         visualization=True):
        """API of Object Detection.

        :param paths: the path of images.
        :type paths: list, each element is correspond to the path of an image.
        :param images: data of images, [N, H, W, C]
        :type images: numpy.ndarray
        :param use_gpu: whether to use gpu or not.
        :type use_gpu: bool
        :param batch_size: bathc size.
        :type batch_size: int
        :param output_dir: the directory to store the detection result.
        :type output_dir: str
        :param score_thresh: the threshold of detection confidence.
        :type score_thresh: float
        :param visualization: whether to draw box and save images.
        :type visualization: bool
        """
        if data and 'image' in data:
            paths = data['image'] if not paths else paths + data['image']
        all_images = []
        paths = paths if paths else []
        for yield_return in self.faster_rcnn.test_reader(paths, images):
            all_images.append(yield_return)

        images_num = len(all_images)
        loop_num = ceil(images_num / batch_size)
        res = []
        for iter_id in range(loop_num):
            batch_data = []
            handle_id = iter_id * batch_size
            for image_id in range(batch_size):
                try:
                    batch_data.append(all_images[handle_id + image_id])
                except:
                    pass
            padding_image, padding_info, padding_shape = self.faster_rcnn.padding_minibatch(
                batch_data)
            padding_image_tensor = PaddleTensor(padding_image.copy())
            padding_info_tensor = PaddleTensor(padding_info.copy())
            padding_shape_tensor = PaddleTensor(padding_shape.copy())
            feed_list = [
                padding_image_tensor, padding_info_tensor, padding_shape_tensor
            ]
            if use_gpu:
                data_out = self.gpu_predictor.run(feed_list)
            else:
                data_out = self.cpu_predictor.run(feed_list)
            output = self.faster_rcnn.postprocess(paths=paths,
                                                  images=images,
                                                  data_out=data_out,
                                                  score_thresh=score_thresh,
                                                  label_names=self.label_names,
                                                  output_dir=output_dir,
                                                  handle_id=handle_id,
                                                  visualization=visualization)
            res += output
        return res
Example #28
0
    def object_detection(self,
                         paths=None,
                         images=None,
                         use_gpu=False,
                         batch_size=1,
                         output_dir='detection_result',
                         score_thresh=0.5,
                         visualization=True):
        """API of Object Detection.

        Args:
            paths (list[str]): The paths of images.
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
            batch_size (int): batch size.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
            score_thresh (float): threshold for object detecion.
            visualization (bool): whether to save result as images.

        Returns:
            res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
                data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
                    left (float): The X coordinate of the upper left corner of the bounding box;
                    top (float): The Y coordinate of the upper left corner of the bounding box;
                    right (float): The X coordinate of the lower right corner of the bounding box;
                    bottom (float): The Y coordinate of the lower right corner of the bounding box;
                    label (str): The label of detection result;
                    confidence (float): The confidence of detection result.
                save_path (str, optional): The path to save output images.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly."
                )

        all_images = list()
        paths = paths if paths else list()
        for yield_data in test_reader(paths, images):
            all_images.append(yield_data)

        images_num = len(all_images)
        loop_num = int(np.ceil(images_num / batch_size))
        res = list()
        for iter_id in range(loop_num):
            batch_data = list()
            handle_id = iter_id * batch_size
            for image_id in range(batch_size):
                try:
                    batch_data.append(all_images[handle_id + image_id])
                except:
                    pass
            padding_image, padding_info = padding_minibatch(
                batch_data, coarsest_stride=32, use_padded_im_info=True)
            padding_image_tensor = PaddleTensor(padding_image.copy())
            padding_info_tensor = PaddleTensor(padding_info.copy())
            feed_list = [padding_image_tensor, padding_info_tensor]
            if use_gpu:
                data_out = self.gpu_predictor.run(feed_list)
            else:
                data_out = self.cpu_predictor.run(feed_list)
            output = postprocess(
                paths=paths,
                images=images,
                data_out=data_out,
                score_thresh=score_thresh,
                label_names=self.label_names,
                output_dir=output_dir,
                handle_id=handle_id,
                visualization=visualization)
            res += output
        return res
Example #29
0
    def style_transfer(self,
                       images=None,
                       paths=None,
                       alpha=1,
                       use_gpu=False,
                       output_dir='transfer_result',
                       visualization=False):
        """
        API for image style transfer.

        Args:
            images (list): list of dict objects, each dict contains key:
                content(str): value is a numpy.ndarry with shape [H, W, C], content data.
                styles(str): value is a list of numpy.ndarray with shape [H, W, C], styles data.
                weights(str, optional): value is the interpolation weights correspond to styles.
            paths (list): list of dict objects, each dict contains key:
                content(str): value is the path to content.
                styles(str): value is the paths to styles.
                weights(str, optional): value is the interpolation weights correspond to styles.
            alpha (float): The weight that controls the degree of stylization. Should be between 0 and 1.
            use_gpu (bool): whether to use gpu.
            output_dir (str): the path to store output images.
            visualization (bool): whether to save image or not.

        Returns:
            im_output (list[dict()]): list of output images and save path of images.
        """
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
                )

        im_output = []
        for component, w, h in reader(images, paths):
            content = PaddleTensor(component['content_arr'].copy())
            content_feats = self.gpu_predictor_enc.run([
                content
            ]) if use_gpu else self.cpu_predictor_enc.run([content])
            accumulate = np.zeros((3, 512, 512))
            for idx, style_arr in enumerate(component['styles_arr_list']):
                style = PaddleTensor(style_arr.copy())
                # encode
                style_feats = self.gpu_predictor_enc.run([
                    style
                ]) if use_gpu else self.cpu_predictor_enc.run([style])
                fr_feats = fr(content_feats[0].as_ndarray(),
                              style_feats[0].as_ndarray(), alpha)
                fr_feats = PaddleTensor(fr_feats.copy())
                # decode
                predict_outputs = self.gpu_predictor_dec.run([
                    fr_feats
                ]) if use_gpu else self.cpu_predictor_dec.run([fr_feats])
                # interpolation
                accumulate += predict_outputs[0].as_ndarray(
                )[0] * component['style_interpolation_weights'][idx]
            # postprocess
            save_im_name = 'ndarray_{}.jpg'.format(time.time())
            result = postprocess(accumulate,
                                 output_dir,
                                 save_im_name,
                                 visualization,
                                 size=(w, h))
            im_output.append(result)
        return im_output