def __call__(self, results): h, w = results['img'].shape[:-1] if self.is_test: results['img_raw'] = results['img'] img_raw_4x = cv2.resize(results['img'], dsize=(w * 2, h * 2), interpolation=INTER_MODE[self.interpolation]) img_raw_8x = cv2.resize(results['img'], dsize=(w * 4, h * 4), interpolation=INTER_MODE[self.interpolation]) img_4x = np.ascontiguousarray(img_raw_4x, dtype="uint8") img_8x = np.ascontiguousarray(img_raw_8x, dtype="uint8") img_encode_4x = self.jpeg.encode(img_4x, quality=100, jpeg_subsample=2) img_encode_8x = self.jpeg.encode(img_8x, quality=100, jpeg_subsample=2) dct_y, _, _ = loads(img_encode_4x) # 28 _, dct_cb, dct_cr = loads(img_encode_8x) # 28 plot_dct(dct_y, results['img_info']['filename']) if self.channels == 192: results['img'] = np.concatenate((dct_y, dct_cb, dct_cr), axis=2) else: results['img'] = np.concatenate( (dct_y[:, :, self.subset_y], dct_cb[:, :, self.subset_cb], dct_cr[:, :, self.subset_cr]), axis=2) return results
def test_loads(self): with open(self.jpeg_file, 'rb') as src: buffer = src.read() dct_y, dct_c, dct_r = loads(buffer) self.assertEqual(dct_y.shape, (205, 205, 64), "wrong dct shape") self.assertEqual(dct_c.shape, (103, 103, 64), "wrong dct shape") self.assertEqual(dct_r.shape, (103, 103, 64), "wrong dct shape") dct_y_nonormalized, dct_c_nonormalized, dct_r_nonormalized = loads( buffer, normalized=False) self.assertEqual(dct_y_nonormalized.shape, (205, 205, 64), "wrong dct shape") self.assertEqual(dct_c_nonormalized.shape, (103, 103, 64), "wrong dct shape") self.assertEqual(dct_r_nonormalized.shape, (103, 103, 64), "wrong dct shape") normalized_range = dct_y.min(), dct_y.max() unnormalized_range = dct_y_nonormalized.min(), dct_y_nonormalized.max() self.assertTrue( unnormalized_range[0] >= normalized_range[0] and unnormalized_range[1] <= normalized_range[1], "normalized shall produce large range of values") normalized_range = dct_c.min(), dct_c.max() unnormalized_range = dct_c_nonormalized.min(), dct_c_nonormalized.max() self.assertTrue( unnormalized_range[0] >= normalized_range[0] and unnormalized_range[1] <= normalized_range[1], "normalized shall produce large range of values")
def __call__(self, sample): bytearr = io.BytesIO() pil_sample = transforms.ToPILImage()(sample) pil_sample.save(fp=bytearr, format='jpeg', quality=100, subsampling=0) y, cb, cr = loads(bytearr.getvalue(), normalized=self.normalize) img_dct = self._upsample_and_concat(y, cb, cr) return img_dct
def decode(self, unischema_field, value): """read/load the dct coefficients from a string of bytes representing a jpeg image :param unischema_field: not used, interface compatibility :param value: jpeg bytes """ dct_y, dct_cb, dct_cr = loads(value, self._normalized, self._channels) return dct_y, dct_cb, dct_cr
def __call__(self, results): img = np.ascontiguousarray(results['img'], dtype="uint8") img_encode = self.jpeg.encode(img, quality=100, jpeg_subsample=2) dct_y, dct_cb, dct_cr = loads(img_encode) # 28 results['dct_y'] = dct_y results['dct_cb'] = dct_cb results['dct_cr'] = dct_cr return results
def __getitem__(self, index): path, target = self.samples[index] sample = self.loader(path) # with open(path, 'rb') as src: # buffer = src.read() # dct_y_bak, dct_cb_bak, dct_cr_bak = loads(buffer) if self.transform is not None: sample = self.transform(sample) # sample_resize = sample.resize((224*2, 224*2), resample=0) # PIL to numpy sample = np.asarray(sample) # RGB to BGR sample = sample[:, :, ::-1] # JPEG Encode sample = np.ascontiguousarray(sample, dtype="uint8") sample = self.jpeg.encode(sample, quality=100, jpeg_subsample=2) dct_y, dct_cb, dct_cr = loads(sample) # 28 # sample_resize = np.asarray(sample_resize) # sample_resize = sample_resize[:, :, ::-1] # sample_resize = np.ascontiguousarray(sample_resize, dtype="uint8") # sample_resize = self.jpeg.encode(sample_resize, quality=100) # _, dct_cb_resize, dct_cr_resize = loads(sample_resize) # 28 # dct_cb_resize, dct_cr_resize = torch.from_numpy(dct_cb_resize).permute(2, 0, 1).float(), \ # torch.from_numpy(dct_cr_resize).permute(2, 0, 1).float() # dct_y_unnormalized, dct_cb_unnormalized, dct_cr_unnormalized = loads(sample, normalized=False) # 28 # dct_y_normalized, dct_cb_normalized, dct_cr_normalized = loads(sample, normalized=True) # 28 # total_y = (dct_y-dct_y_bak).sum() # total_cb = (dct_cb-dct_cb_bak).sum() # total_cr = (dct_cr-dct_cr_bak).sum() # print('{}, {}, {}'.format(total_y, total_cb, total_cr)) dct_y, dct_cb, dct_cr = torch.from_numpy(dct_y).permute(2, 0, 1).float(), \ torch.from_numpy(dct_cb).permute(2, 0, 1).float(), \ torch.from_numpy(dct_cr).permute(2, 0, 1).float() # transform = transforms.Resize(28, interpolation=2) # dct_cb_resize2 = [transform(Image.fromarray(dct_c.numpy())) for dct_c in dct_cb] if self.subset: dct_y, dct_cb, dct_cr = dct_y[self.subset[0]:self.subset[1]], dct_cb[self.subset[0]:self.subset[1]], \ dct_cr[self.subset[0]:self.subset[1]] if self.target_transform is not None: dct_y = self.target_transform[0](dct_y) dct_cb = self.target_transform[1](dct_cb) dct_cr = self.target_transform[2](dct_cr) return dct_y, dct_cb, dct_cr, target
def __call__(self, results): img = np.ascontiguousarray(results['img'], dtype="uint8") img_encode = self.jpeg.encode(img, quality=100, jpeg_subsample=2) dct_y, dct_cb, dct_cr = loads(img_encode) # 28 chrome_w, chrome_h = dct_cb.shape[:-1] dct_cb_up = cv2.resize(dct_cb, dsize=(chrome_h*2, chrome_w*2), interpolation=cv2.INTER_LINEAR) dct_cr_up = cv2.resize(dct_cr, dsize=(chrome_h*2, chrome_w*2), interpolation=cv2.INTER_LINEAR) if self.channels == 192: results['img'] = np.concatenate((dct_y, dct_cb_up, dct_cr_up), axis=2).astype('float32') else: results['img'] = np.concatenate((dct_y[:, :, self.subset_y], dct_cb_up[:, :, self.subset_cb], dct_cr_up[:, :, self.subset_cr]), axis=2).astype('float32') return results
def __getitem__(self, index): path, target = self.samples[index] # path = '/storage-t1/user/kaixu/datasets/ILSVRC2012/test/train/n02447366/n02447366_23489.JPEG' # print('{}, {}'.format(index, path)) if self.backend == 'opencv': sample = self.loader(path, backend='opencv', colorSpace='BGR') elif self.backend == 'dct': try: with open(path, 'rb') as src: buffer = src.read() dct_y, dct_cb, dct_cr = loads(buffer) except: notValid = True while notValid: index = random.randint(0, len(self.samples) - 1) path, target = self.samples[index] with open(path, 'rb') as src: buffer = src.read() try: dct_y, dct_cb, dct_cr = loads(buffer) notValid = False except: notValid = True if len(dct_y.shape) != 3: notValid = True while notValid: index = random.randint(0, len(self.samples) - 1) path, target = self.samples[index] with open(path, 'rb') as src: buffer = src.read() try: dct_y, dct_cb, dct_cr = loads(buffer) notValid = False except: print(path) notValid = True y_size_h, y_size_w = dct_y.shape[:-1] cbcr_size_h, cbcr_size_w = dct_cb.shape[:-1] y_size_h, cbcr_size_h = adjust_size(y_size_h, cbcr_size_h) y_size_w, cbcr_size_w = adjust_size(y_size_w, cbcr_size_w) dct_y = dct_y[:y_size_h, :y_size_w] dct_cb = dct_cb[:cbcr_size_h, :cbcr_size_w] dct_cr = dct_cr[:cbcr_size_h, :cbcr_size_w] sample = [dct_y, dct_cb, dct_cr] y_h, y_w, _ = dct_y.shape cbcr_h, cbcr_w, _ = dct_cb.shape if self.transform is not None: dct_y, dct_cb, dct_cr = self.transform(sample) if self.backend == 'dct': if dct_cb is not None: image = torch.cat((dct_y, dct_cb, dct_cr), dim=1) return image, target else: return dct_y, target else: if dct_cb is not None: return dct_y, dct_cb, dct_cr, target else: return dct_y, target
def _data_generation(self, indexes): """ Internal function used to generate the batch of data. # Argument: - indexes: A list of indexes to the images to use in the batch of data. # Returns: Two values, the batch of images and the labels associated. """ # Prepare the matrices to hold the data. if self.input_size is not None: X_y = np.empty( (self._batch_size, *self.input_size, 64), dtype=np.int32) if not self.split_cbcr: X_cbcr = np.empty( (self._batch_size, self.input_size[0] // 2, self.input_size[1] // 2, 128), dtype=np.int32) else: X_cb = np.empty( (self._batch_size, self.input_size[0] // 2, self.input_size[1] // 2, 64), dtype=np.int32) X_cr = np.empty( (self._batch_size, self.input_size[0] // 2, self.input_size[1] // 2, 64), dtype=np.int32) y = np.zeros((self._batch_size, self.number_of_classes), dtype=np.int32) # Iterate over the indexes to get the data for i, k in enumerate(indexes): # Get the index of the class for later usage last_slash = self.images_path[k].rfind("/") second_last_slash = self.images_path[k][:last_slash].rfind("/") index_class = self.images_path[k][second_last_slash + 1:last_slash] # Load the image img = cv2.imread(self.images_path[k]) # Apply the transformations if self.transforms: for transform in self.transforms: img = transform(image=img)['image'] # Save the data to re-open it _, buffer = cv2.imencode(".jpg", img) io_buf = BytesIO(buffer) # Read the data from the buffer dct_y, dct_cb, dct_cr = loads(io_buf.getvalue()) # If the size of the input is not specified, create the matrices and load the data if self.input_size is None: if not self.split_cbcr: X_cbcr = np.empty( (self._batch_size, dct_cb.shape[0], dct_cb.shape[1], dct_cb.shape[2] * 2), dtype=np.int32) X_cbcr[i] = np.concatenate([dct_cb, dct_cr], axis=-1) else: X_cb = np.empty( (self._batch_size, dct_cb.shape[0], dct_cb.shape[1], dct_cb.shape[2]), dtype=np.int32) X_cr = np.empty( (self._batch_size, dct_cb.shape[0], dct_cb.shape[1], dct_cb.shape[2]), dtype=np.int32) X_cb[i] = dct_cb X_cr[i] = dct_cr X_y = np.zeros( (self._batch_size, dct_cb.shape[0] * 2, dct_cb.shape[1] * 2, dct_cb.shape[2]), dtype=np.int32) X_y[i, :dct_y.shape[0], :dct_y.shape[1], :] = dct_y else: # load the data in the matrices try: X_y[i] = dct_y if not self.split_cbcr: X_cbcr[i] = np.concatenate([dct_cb, dct_cr], axis=-1) else: X_cb[i] = dct_cb X_cr[i] = dct_cr except Exception as e: # Debug, should not go there anymore raise Exception(str(e) + str(self.images_path[k])) # Setting the target class to 1 y[i, int(self.association[index_class])] = 1 if not self.split_cbcr: if self.only_y: return X_y, y else: return [X_y, X_cbcr], y else: return [X_y, X_cb, X_cr], y
def transform_dct(img, encoder): if img.dtype != 'uint8': img = np.ascontiguousarray(img, dtype="uint8") img = encoder.encode(img, quality=100, jpeg_subsample=2) dct_y, dct_cb, dct_cr = loads(img) # 28 return dct_y, dct_cb, dct_cr
def __data_generation(self, indexes): # Override the labels formats of all the transformations to make sure they are set correctly. if not (self.labels is None): if self.transforms is not None: for transform in self.transforms: transform.labels_format = self.labels_format batch_X, batch_y = [], [] # Load the images and labels for i in indexes: with Image.open(self.images_path[i]) as image: image = image.convert("RGB") batch_X.append(np.array(image, dtype=np.uint8)) if self._train_mode: batch_y.append(deepcopy(self.labels[i])) else: batch_y.append([[0, 0, 0, 0, 0]]) # In case we need to remove any images from the batch, store their indices in this list. batch_items_to_remove = [] for i in range(len(batch_X)): batch_y[i] = np.array(batch_y[i]) # Apply any image transformations we may have received. if self.transforms: for transform in self.transforms: batch_X[i], batch_y[i] = transform(batch_X[i], batch_y[i]) # In case the transform failed to produce an output image, which is possible for some random transforms. if batch_X[i] is None: batch_items_to_remove.append(i) continue xmin = self.labels_format['xmin'] ymin = self.labels_format['ymin'] xmax = self.labels_format['xmax'] ymax = self.labels_format['ymax'] if self._train_mode and ( np.any(batch_y[i][:, xmax] - batch_y[i][:, xmin] <= 0) or np.any(batch_y[i][:, ymax] - batch_y[i][:, ymin] <= 0)): batch_y[i] = self.box_filter(batch_y[i]) for to_remove in batch_items_to_remove[::-1]: batch_X.pop(i) batch_y.pop(i) if self.label_encoder and self._train_mode: batch_y_encoded = self.label_encoder(batch_y) else: batch_y_encoded = batch_y if not self.dct: for i in range(len(batch_X)): batch_X[i] = preprocess_input(batch_X[i]) batch_X = np.array(batch_X) return batch_X, batch_y_encoded else: X_y = [] if self.split_cbcr: X_cb = [] X_cr = [] else: X_cbcr = [] for i, image_to_save in enumerate(batch_X): im = Image.fromarray(image_to_save) fake_file = BytesIO() im.save(fake_file, format="jpeg") dct_y, dct_cb, dct_cr = loads(fake_file.getvalue()) y_x, y_y, y_c = dct_y.shape cb_x, cb_y, cb_c = dct_cb.shape temp_y = np.zeros((cb_x * 2, cb_y * 2, y_c), dtype=np.int16) temp_y[:y_x, :y_y, :] = dct_y X_y.append(temp_y) if self.split_cbcr: X_cb.append(dct_cb) X_cr.append(dct_cr) else: X_cbcr.append(np.concatenate([dct_cb, dct_cr], axis=-1)) if self.split_cbcr: return [np.array(X_y), np.array(X_cb), np.array(X_cr)], batch_y_encoded else: if self.only_y: return np.array(X_y), batch_y_encoded else: return [np.array(X_y), np.array(X_cbcr)], batch_y_encoded
def convert_to_dct(img_location): dct_y, dct_cb, dct_cr = load(img_location) with open(img_location, 'rb') as src: buffer = src.read() dct_y, dct_cb, dct_cr = loads(buffer) return [dct_y, dct_cb, dct_cr]
def jpeg_to_coef(jpeg): coefs = loads(jpeg, False) # `False`: get quantized dct coefficient return np.stack(coefs, axis=0)