class DataGen(Sequence): def __init__(self, imgs, masks, weights, batch_size, shuffle): self.imgs = imgs self.masks = masks self.batch_size = batch_size self.shuffle = shuffle self.aug = ImageDataGenerator( horizontal_flip=True, vertical_flip=True, shear_range=0.05, rotation_range=20, width_shift_range=0.1, height_shift_range=0.1 ) self.weights = weights def on_epoch_end(self): if self.shuffle: np.random.RandomState(seed=444).shuffle(self.imgs) np.random.RandomState(seed=444).shuffle(self.masks) np.random.RandomState(seed=444).shuffle(self.weights) def __len__(self): return self.imgs.shape[0] // self.batch_size def __getitem__(self,idx): img_batch = self.imgs[idx*self.batch_size:(idx+1)*self.batch_size,:,:,:] mask_batch = self.masks[idx*self.batch_size:(idx+1)*self.batch_size,:,:,:] sample_weights = self.weights[idx*self.batch_size:(idx+1)*self.batch_size] if self.shuffle: transform_dict = { 'theta' : (np.random.rand()*20.0), 'tx' : (np.random.rand()/10.0), 'ty' : (np.random.rand()/10.0), 'shear' : (np.random.rand()/20.0), 'flip_horizontal': bool(round(np.random.rand())), 'flip_vertical' : bool(round(np.random.rand())) } img_batch = np.array([self.aug.apply_transform(img, transform_dict) \ for img in img_batch]) mask_batch = np.array([self.aug.apply_transform(mask, transform_dict) \ for mask in mask_batch]) return img_batch, mask_batch, sample_weights
def batch_generator(db, fsq, batch_size=100, partition='train', augment=False): # TODO: add ingredient result in output, format: outputs=[ingre_category, food_category] ids = db['ids_{}'.format(partition)] classes = db['classes_{}'.format(partition)] impos = db['impos_{}'.format(partition)] ims = db['ims_{}'.format(partition)] numims = db['numims_{}'.format(partition)] # ingrs = db['ingrs_{}'.format(partition)] partition_size = len(ids) if augment: augmenter = ImageDataGenerator( data_format="channels_first", horizontal_flip=True, zoom_range=0.1, width_shift_range=0.2, height_shift_range=0.2, fill_mode="constant" ) im_shape = ims[0].shape random_augmenters = [augmenter.get_random_transform(im_shape) for i in range(1000)] print("Created augmenter") while(True): images = [] categories = [] ingredients = [] while(len(images) < batch_size): # i in np.random.choice(partition_size, size=batch_size): i = np.random.random_integers(0, partition_size - 1) id = ids[i].decode("utf-8") # Since category=0 is the background class, we can ignore that # Experiment with ignoring category 1 (peanut butter, comprising half of # all) category = classes[i] ingr = fsq.get_vector_for_food(id) if len(ingr) == 0: continue for j in range(numims[i]): index = impos[i][j] - 1 # Only augment 1/5 of the time if augment and np.random.randint(5) >= 4: opts = np.random.choice(random_augmenters, 1)[0] image = augmenter.apply_transform(ims[index], opts) else: image = ims[index] images.append(image) ingredients.append(ingr) categories.append(category) batch_x = np.array(images) batch_y = to_categorical(categories, 16) yield (batch_x, {'ingre_category': np.array(ingredients), 'food_category': batch_y})
def create_aug_trees(pil_img): img_arr = keras_preprocessing.image.img_to_array(pil_img) pil_img_list = [] generator = ImageDataGenerator() for i in range(9): test = generator.apply_transform(img_arr, transform_parameters={ "theta": random.randrange(-10, 10), "shear": random.randrange(0, 20), "zx": random.uniform(0.9, 1.1), "zy": random.uniform(0.9, 1.1), "flip_horizontal": True if random.random() > 0.5 else False, "flip_vertical": True if random.random() > 0.5 else False, "brightness": random.uniform(0.9, 1.1) }) final = keras_preprocessing.image.array_to_img(test) pil_img_list.append(final) return pil_img_list
class Sequencer(Sequence): def __init__(self, X, y, batch_size, sequence_size, data_gen_args): """A `Sequence` implementation that can augment data X: The numpy array of inputs. y: The numpy array of targets. batch_size: The generator mini-batch size. sequence_size: The number of elements in the sequence data_gen_args: The arguments for the ImageDataGenerator to apply on X, y """ self.X = X self.y = y self.batch_size = batch_size self.sequence_encoding = random.sample(range(0, sequence_size), sequence_size) self.imgaug = ImageDataGenerator(data_gen_args) def __len__(self): return len(self.sequence_encoding) // self.batch_size def on_epoch_end(self): pass def __getitem__(self, batch_idx): batch_X = np.zeros((self.batch_size, self.X.shape[1], self.X.shape[2], self.X.shape[3])) batch_y = np.zeros((self.batch_size, self.y.shape[1], self.y.shape[2], self.y.shape[3])) for i in range(self.batch_size): cur_real_idx = (batch_idx + i) % self.X.shape[0] # This creates a dictionary with the params params = self.imgaug.get_random_transform(batch_X[i].shape, seed=self.sequence_encoding[batch_idx * self.batch_size + i]) # We can now deterministicly augment all the images batch_X[i] = self.imgaug.apply_transform(self.X[cur_real_idx], params) batch_y[i] = self.imgaug.apply_transform(self.y[cur_real_idx], params) return batch_X, batch_y
class TrainGenerator(Sequence): def __init__(self, x_data, y_data, batch_size=10, augmentation=True, image_shape=None): self.x_data, self.y_data = shuffle(x_data, y_data) self.image_shape = image_shape self.batch_size = batch_size self.augmentation = augmentation self.image_transform = ImageDataGenerator(rotation_range=20, horizontal_flip=True, vertical_flip=True, zoom_range=0.3, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, fill_mode='reflect') def __len__(self): return int(np.ceil(len(self.x_data) / self.batch_size)) def __getitem__(self, idx): x_batch, y_batch = self.x_data[idx * self.batch_size:( idx + 1) * self.batch_size], self.y_data[idx * self.batch_size:(idx + 1) * self.batch_size] batch = [self.transform(x, y) for x, y in zip(x_batch, y_batch)] return np.array([d[0] for d in batch]), np.array([d[1] for d in batch]) def transform(self, image, label): image = np.expand_dims(image, -1) x, y = np.mgrid[0:image.shape[0], 0:image.shape[1]] pos = np.dstack((x, y)) y0 = int(label[1] * (pos.shape[0] - 1)) x0 = int(label[0] * (pos.shape[1] - 1)) target = multivariate_normal.pdf(pos, [y0, x0], [100, 100]) target = np.expand_dims(target, axis=-1) if self.augmentation: params = self.image_transform.get_random_transform(image.shape) image = self.image_transform.apply_transform(image, params) target = self.image_transform.apply_transform(target, params) image = self.prepImage(image) target = self.prepTarget(target) return image, target def prepTarget(self, target): if self.image_shape: target = resize(target, self.image_shape, order=3) target = (target - np.min(target)) / (np.max(target) - np.min(target) ) # normalize return target def prepImage(self, image): if self.image_shape: image = resize(image, self.image_shape, order=3) image = (image - np.mean(image)) / (np.std(image) + 1e-8) return image def on_epoch_end(self): self.x_data, self.y_data = shuffle(self.x_data, self.y_data)
class TimeSeriesGenerator(Sequence): def __init__(self, zip_file: zipfile.ZipFile, batch_size: int, frames: list, target_classes: list, target_classes_rules: dict, img_height, img_width, aug=None, series_len=1, series_interval=1): self.zip_file = zip_file self.aug = aug self.class_num = len(target_classes) self.is_softmax_activation = self.class_num > 1 self.batch_size = batch_size self.series_len = series_len self.img_width = img_width self.img_height = img_height self._class_mapping = self._get_class_mapping(target_classes, target_classes_rules) self._samples = self._get_samples(frames, series_len, series_interval) self._image_processor = ImageDataGenerator(**(aug or {})) def _get_samples(self, frames, series_len, series_interval): samples = [] # parse frame names pattern = r'(?P<video_name>video.+)_frame_(?P<frame>\d+)' labelled_frames = (re.match(pattern, f) for f in frames) labelled_frames = [{ 'video_name': m.group('video_name'), 'frame': int(m.group('frame')), 'name': m.string } for m in labelled_frames] # read video fps info from auxilary json file video_fps = self.zip_file.read('video_fps.json').decode('utf-8') video_fps = json.loads(video_fps) all_frames = set(f for f in self.zip_file.namelist() if f.startswith('images/')) all_masks = set(f for f in self.zip_file.namelist() if f.startswith('segments/')) # create samples from labelled frames for frame in labelled_frames: series_step = int(video_fps[frame['video_name']] * series_interval) series_end = frame['frame'] series_start = max(series_end - series_step * (series_len - 1), 0) frames_idx = [ *range(series_start, series_end, series_step), series_end ] if len(frames_idx) == series_len: series = [ '{}_frame_{}'.format(frame['video_name'], idx) for idx in frames_idx ] # make sure all images and masks exist before process assert all('images/{}.jpg'.format(f) in all_frames for f in series),\ 'images sequence not found: video {video_name}, frame {frame}'.format(**frame) assert 'segments/{}.png'.format(series[-1]) in all_masks,\ 'segments not found: video {video_name}, frame {frame}'.format(**frame) samples.append(series) return samples def _get_class_mapping(self, target_classes, target_classes_rules): mapping = {} #read labels labels = self.zip_file.read('labels.txt').decode('utf-8').split('\r\n') labels = { int(l.split(':')[0]): l.split(':')[1].lstrip(' ') for l in labels } for target_idx, target_class in enumerate(target_classes): if target_class in target_classes_rules: for rule_list_item in target_classes_rules[target_class]: label_idx = next((idx for idx, label in labels.items() if label == rule_list_item), None) if label_idx is not None: mapping[label_idx] = target_idx else: raise Exception( "Can't find mapping target class '{}' with rule '{}' in labels: '{}'" .format(target_class, rule_list_item, dict(labels))) else: # skip exception on background class if target_idx == 0 and labels[0] == 'background': continue raise Exception( "Can't find mapping rule for the class '{}' with rules '{}' and labels: '{}'" .format(target_class, target_classes_rules, dict(labels))) for i in range(self.class_num): if i not in mapping.values(): # minus in key mean that it is skipped, but could be used as background # extra -1 is used for the i==0 - there are no -0 with type int mapping[-i - 1] = i return mapping def _unpack_to_classes_with_mapping(self, mask): new_y = np.zeros(mask.shape[:2] + (self.class_num, ), dtype=mask.dtype) filled_mask = np.zeros(mask.shape[:2], dtype=np.bool_) items_to_map = sorted(self._class_mapping.items(), key=lambda x: x[0], reverse=True) for source_idx, target_idx in items_to_map: # filling class 0 with all not filled pixels when it has softmax activation if self.is_softmax_activation and target_idx == 0: new_y[:, :, target_idx][~filled_mask] = 1 continue # updates only pixes with 0 values - it will allow to combine many masks into 1 without overwritten pixels update_mask = (new_y[:, :, target_idx] == 0) & (mask == source_idx) # disable pixel filling when it already filled by previous classes if self.is_softmax_activation: update_mask[update_mask & filled_mask] = False filled_mask = filled_mask | update_mask new_y[:, :, target_idx][update_mask] = 1 return new_y def __len__(self): return int(np.ceil(len(self._samples) / self.batch_size)) def __getitem__(self, idx): batch_start = idx * self.batch_size batch_end = min(batch_start + self.batch_size, len(self._samples)) x_batch = np.zeros((batch_end - batch_start, self.series_len, self.img_height, self.img_width, 3), dtype=np.float32) y_batch = np.zeros((batch_end - batch_start, self.img_height, self.img_width, self.class_num), dtype=np.uint8) for batch_idx, sample_idx in enumerate(range(batch_start, batch_end)): # apply same tranformations to all frames in a sample transform_params = self._image_processor.get_random_transform( (self.img_height, self.img_width)) for series_idx, frame_name in enumerate(self._samples[sample_idx]): # load image img_byte_string = self.zip_file.read( 'images/{}.jpg'.format(frame_name)) img = np.frombuffer(img_byte_string, dtype=np.uint8) img = cv2.imdecode(img, cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # resize to output size img = cv2.resize(img, (self.img_width, self.img_height), interpolation=cv2.INTER_LINEAR) # apply augmentation img = self._image_processor.apply_transform( img, transform_params) x_batch[batch_idx, series_idx, :, :, :] = img # load mask mask_byte_string = self.zip_file.read('segments/{}.png'.format( self._samples[sample_idx][-1])) mask = np.frombuffer(mask_byte_string, dtype=np.uint8) mask = cv2.imdecode(mask, cv2.IMREAD_GRAYSCALE) # resize to output size mask = cv2.resize(mask, (self.img_width, self.img_height), interpolation=cv2.INTER_NEAREST) # transform graysacale mask to class_num channel mask mask = self._unpack_to_classes_with_mapping(mask) # disable some transformations for mask and apply augmentation transform_params['channel_shift_intensity'] = None transform_params['brightness'] = None mask = self._image_processor.apply_transform( mask, transform_params) y_batch[batch_idx, :, :, :] = mask return x_batch, y_batch