def test_pillow(self): """Test :py:class:`Size` type. """ module = importlib.import_module('dltb.thirdparty.pil') self.assertIn('pil', Image.supported_formats()) image = Image(self.example_image_filename) pil = Image.as_pil(image) self.assertIsInstance(pil, module.PIL.Image.Image)
def test_qt(self): """Test :py:class:`Size` type. """ module = importlib.import_module('qtgui.widgets.image') self.assertIn('qimage', Image.supported_formats()) image = Image(self.example_image_filename) qimage = Image.as_qimage(image) self.assertIsInstance(qimage, module.QImage)
def _image_to_internal(self, imagelike: Imagelike) -> np.ndarray: # FIXME[todo]: check out the correct preprocessing for AlexNet # with the current approach the accuracy is only around 30% # get a numpy.ndarray image = Image.as_array(imagelike, dtype=np.float32, colorspace=Colorspace.RGB) # FIXME: probably we should do center crop here ... image = imresize(image, (227, 227)) # print("Alexnet._image_to_internal:", image.dtype, image.shape) # dividing by 256 brings accuracy down to almost 0%. # image = image/256. # centering slightly improves accuracy # FIXME[todo]: we need real means ... image = image - image.mean() # standardization reduces accuracy to below 3%. # image = image / image.std() # Caffe Uses BGR Order # RGB to BGR: this really boosts performance; from 33% to 55% image = image[:, :, ::-1] # tmp = image[:, :, 2].copy() # image[:, :, 2] = image[:, :, 0] # image[:, :, 0] = tmp return image
def write(self, image: Imagelike, filename: str, **kwargs) -> None: # vmin, vmax: scalar # vmin and vmax set the color scaling for the image by # fixing the values that map to the colormap color # limits. If either vmin or vmax is None, that limit is # determined from the arr min/max value. # # cmap: str or Colormap, optional # A Colormap instance or registered colormap name. The # colormap maps scalar data to colors. It is ignored for # RGB(A) data. Defaults to rcParams["image.cmap"] = # 'viridis' ('viridis'). # # format: str # The file format, e.g. 'png', 'pdf', 'svg', ... # # origin: {'upper', 'lower'} # Indicates whether the (0, 0) index of the array is in the # upper left or lower left corner of the axes. Defaults to # rcParams["image.origin"] = 'upper' ('upper'). # # dpi: int # The DPI to store in the metadata of the file. This does # not affect the resolution of the output image. # plt.imsave(filename, Image.as_array(image, dtype=np.uint8))
def onImageChanged(self, index: int) -> None: self.storeMetadata() if 0 <= index < len(self._faces): self._index = index data = self._faces[index] self.dataView.setData(data) if hasattr(data, 'source'): image = Image(image=data.source) self.imageView.setData(image) if data.has_attribute('boundingbox'): bbox = data.boundingbox bbox = BoundingBox(x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3]) self.imageView.addRegion(Region(bbox)) else: self.imageView.setData(None) position = self.multiImageView.imagePosition(index) if position is not None: imageSize = self.multiImageView.imageSize() spacing = self.multiImageView.spacing() xmargin = (imageSize.width() + spacing) // 2 ymargin = (imageSize.height() + spacing) // 2 self.multiImageScroller.ensureVisible(position.x(), position.y(), xmargin, ymargin) else: self._index = None self.dataView.setData(None) self.imageView.setData(None)
def image_to_internal(self, image: Imagelike) -> torch.Tensor: """Transform an image into a torch Tensor. """ if isinstance(image, torch.Tensor): image_tensor = image elif isinstance(image, np.ndarray): # at this point we need to know the range (if further # preprocessing, e.g., normalization, is required ...) if False and (0 <= image).all(): if (image <= 1).all(): # image is range 0.0 - 1.0 pass elif (image <= 255).all(): # image is range 0 - 255 pass # Question: channel first or channel last? # H X W X C ==> C X H X W # image = np.transpose(image, (2, 0, 1)) # preprocess_numpy expects numpy.ndarray of correct size, # dtype float and values in range [0.0, 1.0]. # It performs the following operations: # 1. [no resizing] # 2. numpy.ndarray -> torch.Tensor # 3. normalization [0.0, 1.0] -> torch.imagenet_range image_tensor = self.preprocess_numpy(image) # old: explicit transformation: # H X W X C ==> C X H X W # image = np.transpose(image, (2, 0, 1)) # # image = torch.from_numpy(image) # image = image.add(-self.imagenet_mean_.view(3, 1, 1)).\ # div(self.imagenet_std_.view(3, 1, 1)) # # add batch dimension: C X H X W ==> B X C X H X W # image = image.unsqueeze(0) else: # the _image_to_internal function expects as input a PIL image! image = Image.as_pil(image) # image should be PIL Image. Got <class 'numpy.ndarray'> image_tensor = self._image_to_internal(image) if image_tensor.dim() == 4: # image is already batch image_batch = image_tensor elif image_tensor.dim() == 3: # create a mini-batch as expected by the model # by adding a batch dimension: C X H X W ==> B X C X H X W image_batch = image_tensor.unsqueeze(0) else: raise ValueError(f"Data of invalid shape {image.shape} cannot " "be transformed into an internal torch image.") # move the input and model to GPU for speed if available image_batch = image_batch.to(self._device) return image_batch
def resize(self, image: Imagelike, size=(640, 360)) -> np.ndarray: """Resize the frame to a smaller resolution to save computation cost. """ # note: skimage.transform.resize takes on output_shape, not a size! # in the output_shape the number of channels is optional. output_shape = size[::-1] image = Image.as_array(image) resized = resize(image, output_shape, preserve_range=True) resized = resized.astype(image.dtype) return resized
def write_metadata(self, data: Image) -> None: """ """ if hasattr(data, 'metafile') and hasattr(data, 'valid'): suffix = '' if data.metafile.endswith('json2') else '2' meta = { 'image': data.filename, 'dataset': data.dataset, 'valid': data.valid, 'age': data.age } if data.has_attribute('source'): meta['source'] = data.source if data.has_attribute('boundingbox'): meta['boundingbox'] = data.boundingbox if data.has_attribute('id'): meta['id'] = data.id filename = data.metafile + suffix LOG.debug("Writing new meta file '%s'", filename) with open(filename, 'w') as outfile: json.dump(meta, outfile) else: LOG.debug("Not writing new meta file (metafile: %s, valid: %s).", hasattr(data, 'metafile'), hasattr(data, 'valid'))
def crop_faces(image: Imagelike) -> np.ndarray: """Crop faces in the style of the original LFW dataset. The procedure for obtaining the 250x250 pixel images is as follows: detect faces with the OpenCV Haar Cascade detector. Then scale the (square-shaped) bounding box by a factor of 2.2 in each direction. Scale the resulting crop to 250x250 pixels. """ opencv_face_module = \ importlib.import_module('.face', 'dltb.thirdparty.opencv') detector = opencv_face_module.DetectorHaar() image = Image(image) bounding_boxes = list(detector.detect_boxes(image)) faces = np.ndarray((len(bounding_boxes), 250, 250, 3), dtype=np.uint8) for index, box in enumerate(bounding_boxes): box = box.scale(2.2, reference='center') patch = box.extract.extract_from_image(image) faces[index] = imresize(patch, (250, 250)) return faces
def warp(image: Imagelike, transformation: np.ndarray, size: Sizelike) -> np.ndarray: """Warp an image by applying a transformation. """ image = Image.as_array(image) size = Size(size) output_shape = (size[1], size[0]) # further argument: order (int, optional): # The order of interpolation. The order has to be in the range 0-5: # 0: Nearest-neighbor # 1: Bi-linear (default) # 2: Bi-quadratic # 3: Bi-cubic # 4: Bi-quartic # 5: Bi-quintic warped = warp(image, transformation, output_shape=output_shape, preserve_range=True) warped = warped.astype(image.dtype) return warped
def load_metadata(self, data: Image) -> None: """ """ filename_meta = data.filename.rsplit('.', maxsplit=1)[0] + '.json' filename_meta += '2' if os.path.isfile(filename_meta + '2') else '' if os.path.isfile(filename_meta): LOG.debug("Loading meta file '%s'", filename_meta) with open(filename_meta) as infile: meta = json.load(infile) # image: path to the image file # source: path to the source file # dataset: the dataset from which this images was taken # boundingbox: bounding bos of the face in the original image # id: the class label data.add_attribute('image', meta['image']) data.add_attribute('dataset', meta['dataset']) if 'source' in meta: source_filename = meta['source'] source_filename = source_filename.replace('\\', '/') source_filename = \ source_filename.replace('E:', self._basedir) data.add_attribute('source', source_filename) if 'boundingbox' in meta: data.add_attribute('boundingbox', meta['boundingbox']) if 'id' in meta: data.add_attribute('id', meta['id']) data.add_attribute('age', meta.get('age', None)) data.add_attribute('valid', meta.get('valid', True)) data.add_attribute('metafile', filename_meta) else: LOG.debug("No meta file for data (tried '%s')", filename_meta) if data.filename.startswith(self.directory): filename = data.filename[len(self.directory) + 1:] parts = filename.split('/') label, imagename = parts[0], parts[-1] if imagename.startswith('imdb_wiki'): data.add_attribute('dataset', 'imdb_wiki') source_filename = os.path.join(self._clean4, 'Unified', filename) if os.path.isfile(source_filename): data.add_attribute('source', source_filename) elif len(parts) > 2 and parts[1] == 'New': LOG.warning("New image without meta data: '%s'", filename) data.add_attribute('dataset') elif os.path.isfile( os.path.join(self._clean2, 'Patricia', filename)): data.add_attribute('dataset', 'Patricia') data.add_attribute( 'source', os.path.join(self._clean2, 'Patricia', filename)) else: LOG.warning("Unknown source dataset for '%s'", filename) data.add_attribute('dataset') else: LOG.warning("Bad filename: '%s' (not in directory '%s')", data.filename, self.directory) data.add_attribute('dataset') if not filename_meta.endswith('json2'): filename_meta += '2' data.add_attribute('metafile', filename_meta) if not data.has_attribute('age'): data.add_attribute('age') if not data.has_attribute('valid'): data.add_attribute('valid', True)
def preprocess(self, image: Imagelike, size: Tuple[int, int] = None, bbox=None, landmark=None, margin: int = 0, **kwargs): # margin=44 """Preprocess the image. Preprocessing consists of multiple steps: 1. read the image 2. obtain the target image size 3. align the image Arguments --------- image: The image to be preprocessed. size: The target size of the image after preprocessing. bbox: The bounding for the image. landmarks: Facial landmarks for face alignment. margin: Extra margin to put around the face. """ # # 1. read the image # img = Image.as_array(image) # # 2. obtain the target image size # # str_image_size = image_size # image_size = [] # image_size as two-element list [width, height] # if str_image_size: # image_size = [int(x) for x in str_image_size.split(',')] # if len(image_size) == 1: # image_size = [image_size[0], image_size[0]] if size is None: image_size = (112, 112) else: image_size = size assert len(image_size) == 2 assert image_size[0] == 112 assert image_size[0] == 112 or image_size[1] == 96 # # 3. align the image # # obtain a transformation matrix transformation = landmark and self._transformation_matrix(landmark) # if no transformation was obtained, just resize if transformation is None: return self._resize_image(img, image_size, margin=margin) # otherweise apply the transformation return self._transform_image(img, transformation, image_size)
def setImage(self, image: Imagelike) -> None: """Set the image for this :py:class:`FacePanel`. This will initiate the processing of this image using the current tools. """ self.setData(Image.as_data(image))
def random(self, seed: int = None) -> None: """Generate random data. """ self._image = Image(self._generator.random(seed)) self.change('data_changed')
def test_image_creation(self): """Test creation of `Image`. """ image = Image(self.example_image_filename) self.assertEqual(image.size(), self.example_image_size)
def test_supported_formats(self): """Test supported image formats. """ self.assertIn('array', Image.supported_formats()) self.assertIn('image', Image.supported_formats())
def main(): """Main program: parse command line options and start face tools. """ parser = ArgumentParser(description='Deep learning based face processing') parser.add_argument('images', metavar='IMAGE', type=str, nargs='*', help='an image to use') parser.add_argument('--webcam', action='store_true', default=False, help='run on webcam') parser.add_argument('--show', action='store_true', default=False, help='show results in a window') group_detector = parser.add_argument_group("Detector arguments") group_detector.add_argument('--detect', action='store_true', default=False, help='run face detection') group_detector.add_argument('--detector', type=str, help='the face detector to use') group_detector.add_argument('--list-detectors', action='store_true', default=False, help='list available detectors') group_detector.add_argument('--warper', type=str, default=None, help='the image warper to use') group_detector.add_argument('--list-warpers', action='store_true', default=False, help='list available image warpers') group_detector.add_argument('--size', type=str, default='112x112', help='size of the output image') group_detector.add_argument('--output-directory', type=str, default='output', help='path of the output directory') ToolboxArgparse.add_arguments(parser) DatasourceArgparse.prepare(parser) args = parser.parse_args() ToolboxArgparse.process_arguments(parser, args) if args.list_detectors: print("FaceDetector implementations:") for index, implementation in enumerate(implementations(FaceDetector)): print(f"{index+1}) {implementation}") return os.EX_OK if args.list_warpers: print("ImageWarper implementations:") for index, implementation in enumerate(ImageWarper.implementations()): print(f"{index+1}) {implementation}") return os.EX_OK # obtain the datasource if provided (otherwise None) datasource = DatasourceArgparse.datasource(parser, args) # obtain an ImageDisplay object if --show is set (otherwise None) display = get_display() if args.show else None # obtain the face detector detector = \ Detector(implementation='dltb.thirdparty.face_evolve.mtcnn.Detector') print(f"Detector: {detector} ({type(detector)})") # obtain the ImageWarper #warper = ImageWarper(implementation='dltb.thirdparty.skimage.ImageUtil') #warper = ImageWarper(implementation='dltb.thirdparty.opencv.ImageUtils') warper = ImageWarper(implementation=args.warper) # create the LandmarkAligner aligner = LandmarkAligner(detector=detector, size=args.size, warper=warper) if not datasource: for image in args.images: apply_single_hack(Image(image), detector, aligner, display=display) else: apply_multi_hack(datasource, detector, aligner, input_directory=datasource.directory, output_directory=Path(args.output_directory), progress=tqdm.tqdm, display=display) return os.EX_OK
def setUp(self): """Initialize a detector to be used in the tests. """ self.detector = Tool['haar'] self.detector.prepare() self.image = Image.as_data('examples/reservoir-dogs.jpg')
def main(): """Main program: parse command line options and start face tools. """ parser = ArgumentParser(description='Deep learning based face processing') parser.add_argument('images', metavar='IMAGE', type=str, nargs='*', help='an image to use') parser.add_argument('--webcam', action='store_true', default=False, help='run on webcam') parser.add_argument('--show', action='store_true', default=True, help='show results in a window') parser.add_argument('--evaluate', action='store_true', default=True, help='perform evaluation') parser.add_argument('--output-directory', type=str, default='output', help='path of the output directory') group_detector = parser.add_argument_group("Detector arguments") group_detector.add_argument('--detect', action='store_true', default=False, help='run face detection') group_detector.add_argument('--detector', type=str, help='the face detector to use') group_detector.add_argument('--list-detectors', action='store_true', default=False, help='list available detectors') group_aligner = parser.add_argument_group("Alignment arguments") group_aligner.add_argument('--align', action='store_true', default=False, help='run face alignment') group_aligner.add_argument('--warper', type=str, default=None, help='the image warper to use') group_aligner.add_argument('--list-warpers', action='store_true', default=False, help='list available image warpers') group_aligner.add_argument('--size', type=str, default='112x112', help='size of the output image') group_recognizer = parser.add_argument_group("Recognition arguments") group_recognizer.add_argument('--verify', action='store_true', default=False, help='run face verification') ToolboxArgparse.add_arguments(parser) DatasourceArgparse.prepare(parser) args = parser.parse_args() ToolboxArgparse.process_arguments(parser, args) if args.list_detectors: print("FaceDetector implementations:") for index, implementation in enumerate(implementations(FaceDetector)): print(f"{index+1}) {implementation}") return if args.list_warpers: print("ImageWarper implementations:") for index, implementation in enumerate(ImageWarper.implementations()): print(f"{index+1}) {implementation}") return os.EX_OK # obtain the datasource if provided (otherwise None) datasource = DatasourceArgparse.datasource(parser, args) if args.detector: detector = FaceDetector(implementation=args.detector) elif args.detector: # FIXME[old] print(f"Detector class: {args.detector}") Detector = import_class(args.detector) detector = Detector() # 'haar', 'ssd', 'hog', 'cnn', 'mtcnn' # detector = Tool['haar'] # detector = Tool['ssd'] print(f"Detector: {detector} [prepared={detector.prepared}]") detector.prepare() print(f"Detector: {detector} [prepared={detector.prepared}]") if args.detect: if args.webcam: webcam = Webcam() display = ImageDisplay(module='qt') display.present(display_video, (webcam, detector)) for url in args.images: if os.path.isdir(url): datasource = ImageDirectory('images') datasource.prepare() for data in datasource: print(detector(data)) # detector.process(data, mark=True) # output_detections(detector, data) else: print(f"Applying detector to {url}") # print(detector(url)) result = ('detections', 'mark') # , 'extract') data = detector.process_image( url, result=result) #mark=True, extract=True data.debug() output_detections(detector, data) # , extract=True elif args.align: # # perform face alignment # # obtain the face detector detector_implementation = 'dltb.thirdparty.face_evolve.mtcnn.Detector' detector = FaceDetector(implementation=detector_implementation) print(f"Detector: {detector} ({type(detector)})") # obtain the ImageWarper warper = ImageWarper(implementation=args.warper) # create an aligner aligner = LandmarkAligner(detector=detector, size=args.size, warper=warper) # obtain an ImageDisplay object if --show is set (otherwise None) display = get_display() if args.show else None if not datasource: for image in args.images: apply_single_hack(Image(image), detector, aligner, display=display) else: apply_multi_hack(datasource, detector, aligner, input_directory=datasource.directory, output_directory=Path(args.output_directory), progress=tqdm.tqdm, display=display) elif args.evaluate: # obtain the face detector detector_implementation = 'dltb.thirdparty.face_evolve.mtcnn.Detector' detector = FaceDetector(implementation=detector_implementation) print(f"Detector: {detector} ({type(detector)})") # obtain the ImageWarper warper = ImageWarper(implementation=args.warper) # create an aligner aligner = LandmarkAligner(detector=detector, size=args.size, warper=warper) from dltb.thirdparty.arcface import ArcFace arcface = ArcFace(aligner=aligner) embedding_file_name = Path("embeddings.npz") if embedding_file_name.is_file(): content = np.load(embedding_file_name) embeddings, labels = content['embeddings'], content['labels'] else: iterable = datasource.pairs() iterable = tqdm.tqdm(iterable) embeddings, labels = arcface.embed_labeled_pairs(iterable) print(f"Writing embeddings of shape {embeddings.shape} to " f"'{embedding_file_name}'") np.savez_compressed(embedding_file_name, embeddings=embeddings, labels=labels) print("embeddings:", embeddings.shape, embeddings.dtype) print("labels:", labels.shape, labels.dtype) #for image1, image2, same in iterable: # print(image1.shape, image2.shape, same) # embedding1 = embed(image1) # embedding2 = embed(image1) # distance = distance(embedding1, embedding2) else: print("No operation specified.")