예제 #1
0
def run(args):
    # assume the image is 4K, crop if its larger
    img = ColorImage.imread(args.file_name, resize=(3840, 2160))

    # step 1: find a bounding box region for the document
    document_region = detect_document_region(img)
    # visualize the document region
    region = img.draw_quadrilateral(document_region)
    region.imsave('results/region.png')

    # step 2: find text regions
    # step 2.1: perform homography to flatten out document
    # 1700 x 2200 is size of a letter size paper at 200 DPI
    warped_region = warp_document(img, document_region, 1700, 2200)
    # visualize the flattened document
    warped_region.imsave('results/document.png')
    # extract word regions from document
    words = extract_word_bounds(warped_region.copy())

    # step 3: CRNN word extraction
    texts = []
    for i, word in enumerate(words):
        # crop each word from the document
        word_img = word.crop(warped_region.img)
        # visualize each cropped word
        word_img.imsave('words/word{}.png'.format(i))
        # CRNN text extraction from cropped word
        text = extract_text(word_img.img)
        texts.append(text)
    print(' '.join(texts))
예제 #2
0
 def frames(self, flush=True):
     """ Returns the latest color image from the stream
     Raises:
         Exception if opencv sensor gives ret_val of 0
     """
     self.flush()
     ret_val, frame = self._sensor.read()
     if not ret_val:
         raise Exception(
             "Unable to retrieve frame from OpenCVCameraSensor for id {0}".
             format(self._device_id))
     return ColorImage(frame)
예제 #3
0
 def next_frame(self):
     """ Return next frame from video as ColorImage instance or raise
         StopIteration if stream is over
     """
     if self.frame is not None and self.me < self.start_time:
         self.me += 1
         return self.frame.clone()
     hdr = self.player.readline()
     if hdr != "FRAME\n":
         if self.frame is not None and self.me < self.end_time:
             self.me += 1
             return self.frame.clone()
         self.p.terminate()
         return None  # raise StopIteration
     self.frame = ColorImage(self.width, self.height, fromfile=self.player)
     self.me += 1
     return self.frame.clone()
예제 #4
0
    def frames(self):
        """Retrieve the next frame from the image directory and convert it to a ColorImage,
        a DepthImage, and an IrImage.

        Parameters
        ----------
        skip_registration : bool
            If True, the registration step is skipped.

        Returns
        -------
        :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`, :obj:`IrImage`, :obj:`numpy.ndarray`
            The ColorImage, DepthImage, and IrImage of the current frame.

        Raises
        ------
        RuntimeError
            If the Kinect stream is not running or if all images in the
            directory have been used.
        """
        if not self._running:
            raise RuntimeError(
                'VirtualKinect2 device pointing to %s not runnning. Cannot read frames'
                % (self._path_to_images))

        if self._im_index > self._num_images:
            raise RuntimeError('VirtualKinect2 device is out of images')

        # read images
        color_filename = os.path.join(self._path_to_images,
                                      'color_%d.png' % (self._im_index))
        color_im = ColorImage.open(color_filename, frame=self._frame)
        depth_filename = os.path.join(self._path_to_images,
                                      'depth_%d.npy' % (self._im_index))
        depth_im = DepthImage.open(depth_filename, frame=self._frame)
        ir_filename = os.path.join(self._path_to_images,
                                   'ir_%d.npy' % (self._im_index))
        ir_im = IrImage.open(ir_filename, frame=self._frame)
        self._im_index += 1
        return color_im, depth_im, ir_im
    def _forward_pass(self, images):
        """ Forward pass a list of images through the CNN """
        # form image array
        num_images = len(images)
        if num_images == 0:
            return None
        for image in images:
            if not isinstance(image, Image):
                new_images = []
                for image in images:
                    if len(image.shape) > 2:
                        new_images.append(
                            ColorImage(image, frame='unspecified'))
                    elif image.dtype == np.float32 or image.dtype == np.float64:
                        new_images.append(
                            DepthImage(image, frame='unspecified'))
                    else:
                        raise ValueError('Image type not understood')
                images = new_images
                break

        im_height = images[0].height
        im_width = images[0].width
        channels = images[0].channels
        tensor_channels = 3
        image_arr = np.zeros(
            [num_images, im_height, im_width, tensor_channels])
        for j, image in enumerate(images):
            if channels == 3:
                image_arr[j, :, :, :] = image.raw_data
            else:
                image_arr[j, :, :, :] = np.tile(image.raw_data, [1, 1, 1, 3])

        # predict
        fp_start = time.time()
        final_blobs = self.cnn_.featurize(image_arr)
        fp_stop = time.time()
        logging.debug('Featurization took %f sec per image' %
                      ((fp_stop - fp_start) / len(images)))
        return final_blobs.reshape(final_blobs.shape[0], -1)
예제 #6
0
class MPlayer(object):
    """ Load a video as YUV4MPEG2 frames using mplayer """

    def __init__(self, video):
        self.p = Popen(command + tail + video.path + force, shell=True, bufsize=0, stdout=PIPE)
        self.player = self.p.stdout
        hdr = self.player.readline()
        try:
            self.width = int(re.search("W(\d+)", hdr).group(1))
            self.height = int(re.search("H(\d+)", hdr).group(1))
        except:
            self.width = 640
            self.height = 480
        self.video = video
        self.me = 0
        self.start_time = self.video.freeze_in_time
        self.end_time = self.video.freeze_out_time
        self.frame = None
        self.fps = 25

    def next_frame(self):
        """ Return next frame from video as ColorImage instance or raise
            StopIteration if stream is over
        """
        if self.frame is not None and self.me < self.start_time:
            self.me += 1
            return self.frame.clone()
        hdr = self.player.readline()
        if hdr != "FRAME\n":
            if self.frame is not None and self.me < self.end_time:
                self.me += 1
                return self.frame.clone()
            self.p.terminate()
            return None  # raise StopIteration
        self.frame = ColorImage(self.width, self.height, fromfile=self.player)
        self.me += 1
        return self.frame.clone()
예제 #7
0
    def _frames_and_index_map(self, skip_registration=False):
        """Retrieve a new frame from the Kinect and return a ColorImage,
        DepthImage, IrImage, and a map from depth pixels to color pixel indices.

        Parameters
        ----------
        skip_registration : bool
            If True, the registration step is skipped.

        Returns
        -------
        :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`, :obj:`IrImage`, :obj:`numpy.ndarray`
            The ColorImage, DepthImage, and IrImage of the current frame, and an
            ndarray that maps pixels of the depth image to the index of the
            corresponding pixel in the color image.

        Raises
        ------
        RuntimeError
            If the Kinect stream is not running.
        """
        if not self._running:
            raise RuntimeError(
                'Kinect2 device %s not runnning. Cannot read frames' %
                (self._device_num))

        # read frames
        frames = self._listener.waitForNewFrame()
        unregistered_color = frames['color']
        distorted_depth = frames['depth']
        ir = frames['ir']

        # apply color to depth registration
        color_frame = self._color_frame
        color = unregistered_color
        depth = distorted_depth
        color_depth_map = np.zeros([depth.height,
                                    depth.width]).astype(np.int32).ravel()
        if not skip_registration and self._registration_mode == Kinect2RegistrationMode.COLOR_TO_DEPTH:
            color_frame = self._ir_frame
            depth = lf2.Frame(depth.width, depth.height, 4,
                              lf2.FrameType.Depth)
            color = lf2.Frame(depth.width, depth.height, 4,
                              lf2.FrameType.Color)
            self._registration.apply(unregistered_color,
                                     distorted_depth,
                                     depth,
                                     color,
                                     color_depth_map=color_depth_map)

        # convert to array (copy needed to prevent reference of deleted data
        color_arr = copy.copy(color.asarray())
        color_arr[:, :, [0, 2]] = color_arr[:, :, [2, 0]]  # convert BGR to RGB
        color_arr[:, :, 0] = np.fliplr(color_arr[:, :, 0])
        color_arr[:, :, 1] = np.fliplr(color_arr[:, :, 1])
        color_arr[:, :, 2] = np.fliplr(color_arr[:, :, 2])
        color_arr[:, :, 3] = np.fliplr(color_arr[:, :, 3])
        depth_arr = np.fliplr(copy.copy(depth.asarray()))
        ir_arr = np.fliplr(copy.copy(ir.asarray()))

        # convert meters
        if self._depth_mode == Kinect2DepthMode.METERS:
            depth_arr = depth_arr * MM_TO_METERS

        # Release and return
        self._listener.release(frames)
        return (ColorImage(color_arr[:, :, :3],
                           color_frame), DepthImage(depth_arr, self._ir_frame),
                IrImage(ir_arr.astype(np.uint16),
                        self._ir_frame), color_depth_map)