Exemple #1
0
class DataGen(Sequence):
  def __init__(self, imgs, masks, weights, batch_size, shuffle):
    self.imgs = imgs
    self.masks = masks
    self.batch_size = batch_size
    self.shuffle = shuffle
    self.aug = ImageDataGenerator(
      horizontal_flip=True, 
      vertical_flip=True,
      shear_range=0.05, 
      rotation_range=20,
      width_shift_range=0.1, 
      height_shift_range=0.1
    )
    self.weights = weights
  
  def on_epoch_end(self):
    if self.shuffle:
      np.random.RandomState(seed=444).shuffle(self.imgs)
      np.random.RandomState(seed=444).shuffle(self.masks)
      np.random.RandomState(seed=444).shuffle(self.weights)

  def __len__(self):
    return self.imgs.shape[0] // self.batch_size

  def __getitem__(self,idx):
    img_batch = self.imgs[idx*self.batch_size:(idx+1)*self.batch_size,:,:,:]
    mask_batch = self.masks[idx*self.batch_size:(idx+1)*self.batch_size,:,:,:]
    sample_weights = self.weights[idx*self.batch_size:(idx+1)*self.batch_size]

    if self.shuffle:
      transform_dict = {
        'theta'          : (np.random.rand()*20.0),
        'tx'             : (np.random.rand()/10.0),
        'ty'             : (np.random.rand()/10.0),
        'shear'          : (np.random.rand()/20.0),
        'flip_horizontal': bool(round(np.random.rand())),
        'flip_vertical'  : bool(round(np.random.rand()))
      }

      img_batch = np.array([self.aug.apply_transform(img, transform_dict) \
          for img in img_batch])
      mask_batch = np.array([self.aug.apply_transform(mask, transform_dict) \
          for mask in mask_batch])

    return img_batch, mask_batch, sample_weights
Exemple #2
0
def batch_generator(db, fsq, batch_size=100, partition='train', augment=False):
  # TODO: add ingredient result in output, format: outputs=[ingre_category, food_category]
  ids = db['ids_{}'.format(partition)]
  classes = db['classes_{}'.format(partition)]
  impos = db['impos_{}'.format(partition)]
  ims = db['ims_{}'.format(partition)]
  numims = db['numims_{}'.format(partition)]
  # ingrs = db['ingrs_{}'.format(partition)]
  partition_size = len(ids)
  if augment:
      augmenter = ImageDataGenerator(
          data_format="channels_first",
          horizontal_flip=True,
          zoom_range=0.1,
          width_shift_range=0.2,
          height_shift_range=0.2,
          fill_mode="constant"
      )
      im_shape = ims[0].shape
      random_augmenters = [augmenter.get_random_transform(im_shape) for i in range(1000)]
      print("Created augmenter")
  while(True):
    images = []
    categories = []
    ingredients = []
    while(len(images) < batch_size):
      # i in np.random.choice(partition_size, size=batch_size):
      i = np.random.random_integers(0, partition_size - 1)
      id = ids[i].decode("utf-8")
      # Since category=0 is the background class, we can ignore that
      # Experiment with ignoring category 1 (peanut butter, comprising half of
      # all)
      category = classes[i]
      ingr = fsq.get_vector_for_food(id)
      if len(ingr) == 0:
        continue
      for j in range(numims[i]):
        index = impos[i][j] - 1
        # Only augment 1/5 of the time
        if augment and np.random.randint(5) >= 4:
          opts = np.random.choice(random_augmenters, 1)[0]
          image = augmenter.apply_transform(ims[index], opts)
        else:
          image = ims[index]
        images.append(image)
        ingredients.append(ingr)
        categories.append(category)
    batch_x = np.array(images)
    batch_y = to_categorical(categories, 16)

    yield (batch_x, {'ingre_category': np.array(ingredients), 'food_category': batch_y})
Exemple #3
0
def create_aug_trees(pil_img):
    img_arr = keras_preprocessing.image.img_to_array(pil_img)

    pil_img_list = []
    generator = ImageDataGenerator()

    for i in range(9):
        test = generator.apply_transform(img_arr, transform_parameters={
            "theta": random.randrange(-10, 10),
            "shear": random.randrange(0, 20),
            "zx": random.uniform(0.9, 1.1),
            "zy": random.uniform(0.9, 1.1),
            "flip_horizontal": True if random.random() > 0.5 else False,
            "flip_vertical": True if random.random() > 0.5 else False,
            "brightness": random.uniform(0.9, 1.1)
        })
        final = keras_preprocessing.image.array_to_img(test)
        pil_img_list.append(final)

    return pil_img_list
class Sequencer(Sequence):
    def __init__(self, X, y, batch_size, sequence_size, data_gen_args):
        """A `Sequence` implementation that can augment data
            X: The numpy array of inputs.
            y: The numpy array of targets.
            batch_size: The generator mini-batch size.
            sequence_size: The number of elements in the sequence
            data_gen_args: The arguments for the ImageDataGenerator to apply on X, y
        """

        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.sequence_encoding = random.sample(range(0, sequence_size), sequence_size)
        self.imgaug = ImageDataGenerator(data_gen_args)

    def __len__(self):
        return len(self.sequence_encoding) // self.batch_size

    def on_epoch_end(self):
        pass

    def __getitem__(self, batch_idx):
        batch_X = np.zeros((self.batch_size, self.X.shape[1], self.X.shape[2], self.X.shape[3]))
        batch_y = np.zeros((self.batch_size, self.y.shape[1], self.y.shape[2], self.y.shape[3]))

        for i in range(self.batch_size):
            cur_real_idx = (batch_idx + i) % self.X.shape[0]
            # This creates a dictionary with the params
            params = self.imgaug.get_random_transform(batch_X[i].shape,
                                                      seed=self.sequence_encoding[batch_idx * self.batch_size + i])
            # We can now deterministicly augment all the images
            batch_X[i] = self.imgaug.apply_transform(self.X[cur_real_idx], params)
            batch_y[i] = self.imgaug.apply_transform(self.y[cur_real_idx], params)

        return batch_X, batch_y
Exemple #5
0
class TrainGenerator(Sequence):
    def __init__(self,
                 x_data,
                 y_data,
                 batch_size=10,
                 augmentation=True,
                 image_shape=None):
        self.x_data, self.y_data = shuffle(x_data, y_data)
        self.image_shape = image_shape
        self.batch_size = batch_size
        self.augmentation = augmentation
        self.image_transform = ImageDataGenerator(rotation_range=20,
                                                  horizontal_flip=True,
                                                  vertical_flip=True,
                                                  zoom_range=0.3,
                                                  width_shift_range=0.1,
                                                  height_shift_range=0.1,
                                                  shear_range=0.2,
                                                  fill_mode='reflect')

    def __len__(self):
        return int(np.ceil(len(self.x_data) / self.batch_size))

    def __getitem__(self, idx):
        x_batch, y_batch = self.x_data[idx * self.batch_size:(
            idx + 1) * self.batch_size], self.y_data[idx *
                                                     self.batch_size:(idx +
                                                                      1) *
                                                     self.batch_size]
        batch = [self.transform(x, y) for x, y in zip(x_batch, y_batch)]
        return np.array([d[0] for d in batch]), np.array([d[1] for d in batch])

    def transform(self, image, label):
        image = np.expand_dims(image, -1)
        x, y = np.mgrid[0:image.shape[0], 0:image.shape[1]]
        pos = np.dstack((x, y))
        y0 = int(label[1] * (pos.shape[0] - 1))
        x0 = int(label[0] * (pos.shape[1] - 1))
        target = multivariate_normal.pdf(pos, [y0, x0], [100, 100])
        target = np.expand_dims(target, axis=-1)
        if self.augmentation:
            params = self.image_transform.get_random_transform(image.shape)
            image = self.image_transform.apply_transform(image, params)
            target = self.image_transform.apply_transform(target, params)

        image = self.prepImage(image)
        target = self.prepTarget(target)

        return image, target

    def prepTarget(self, target):
        if self.image_shape:
            target = resize(target, self.image_shape, order=3)
        target = (target - np.min(target)) / (np.max(target) - np.min(target)
                                              )  # normalize
        return target

    def prepImage(self, image):
        if self.image_shape:
            image = resize(image, self.image_shape, order=3)
        image = (image - np.mean(image)) / (np.std(image) + 1e-8)
        return image

    def on_epoch_end(self):
        self.x_data, self.y_data = shuffle(self.x_data, self.y_data)
Exemple #6
0
class TimeSeriesGenerator(Sequence):
    def __init__(self,
                 zip_file: zipfile.ZipFile,
                 batch_size: int,
                 frames: list,
                 target_classes: list,
                 target_classes_rules: dict,
                 img_height,
                 img_width,
                 aug=None,
                 series_len=1,
                 series_interval=1):
        self.zip_file = zip_file
        self.aug = aug
        self.class_num = len(target_classes)
        self.is_softmax_activation = self.class_num > 1

        self.batch_size = batch_size
        self.series_len = series_len
        self.img_width = img_width
        self.img_height = img_height

        self._class_mapping = self._get_class_mapping(target_classes,
                                                      target_classes_rules)
        self._samples = self._get_samples(frames, series_len, series_interval)
        self._image_processor = ImageDataGenerator(**(aug or {}))

    def _get_samples(self, frames, series_len, series_interval):
        samples = []

        # parse frame names
        pattern = r'(?P<video_name>video.+)_frame_(?P<frame>\d+)'
        labelled_frames = (re.match(pattern, f) for f in frames)
        labelled_frames = [{
            'video_name': m.group('video_name'),
            'frame': int(m.group('frame')),
            'name': m.string
        } for m in labelled_frames]

        # read video fps info from auxilary json file
        video_fps = self.zip_file.read('video_fps.json').decode('utf-8')
        video_fps = json.loads(video_fps)

        all_frames = set(f for f in self.zip_file.namelist()
                         if f.startswith('images/'))
        all_masks = set(f for f in self.zip_file.namelist()
                        if f.startswith('segments/'))
        # create samples from labelled frames
        for frame in labelled_frames:
            series_step = int(video_fps[frame['video_name']] * series_interval)
            series_end = frame['frame']
            series_start = max(series_end - series_step * (series_len - 1), 0)
            frames_idx = [
                *range(series_start, series_end, series_step), series_end
            ]
            if len(frames_idx) == series_len:
                series = [
                    '{}_frame_{}'.format(frame['video_name'], idx)
                    for idx in frames_idx
                ]

                # make sure all images and masks exist before process
                assert all('images/{}.jpg'.format(f) in all_frames for f in series),\
                    'images sequence not found: video {video_name}, frame {frame}'.format(**frame)
                assert 'segments/{}.png'.format(series[-1]) in all_masks,\
                    'segments not found: video {video_name}, frame {frame}'.format(**frame)

                samples.append(series)

        return samples

    def _get_class_mapping(self, target_classes, target_classes_rules):
        mapping = {}

        #read labels
        labels = self.zip_file.read('labels.txt').decode('utf-8').split('\r\n')
        labels = {
            int(l.split(':')[0]): l.split(':')[1].lstrip(' ')
            for l in labels
        }

        for target_idx, target_class in enumerate(target_classes):
            if target_class in target_classes_rules:
                for rule_list_item in target_classes_rules[target_class]:
                    label_idx = next((idx for idx, label in labels.items()
                                      if label == rule_list_item), None)
                    if label_idx is not None:
                        mapping[label_idx] = target_idx
                    else:
                        raise Exception(
                            "Can't find mapping target class '{}' with rule '{}' in labels: '{}'"
                            .format(target_class, rule_list_item,
                                    dict(labels)))
            else:
                # skip exception on background class
                if target_idx == 0 and labels[0] == 'background':
                    continue
                raise Exception(
                    "Can't find mapping rule for the class '{}' with rules '{}' and labels: '{}'"
                    .format(target_class, target_classes_rules, dict(labels)))

        for i in range(self.class_num):
            if i not in mapping.values():
                # minus in key mean that it is skipped, but could be used as background
                # extra -1 is used for the i==0 - there are no -0 with type int
                mapping[-i - 1] = i

        return mapping

    def _unpack_to_classes_with_mapping(self, mask):
        new_y = np.zeros(mask.shape[:2] + (self.class_num, ), dtype=mask.dtype)

        filled_mask = np.zeros(mask.shape[:2], dtype=np.bool_)

        items_to_map = sorted(self._class_mapping.items(),
                              key=lambda x: x[0],
                              reverse=True)
        for source_idx, target_idx in items_to_map:
            # filling class 0 with all not filled pixels when it has softmax activation
            if self.is_softmax_activation and target_idx == 0:
                new_y[:, :, target_idx][~filled_mask] = 1
                continue

            # updates only pixes with 0 values - it will allow to combine many masks into 1 without overwritten pixels
            update_mask = (new_y[:, :, target_idx] == 0) & (mask == source_idx)

            # disable pixel filling when it already filled by previous classes
            if self.is_softmax_activation:
                update_mask[update_mask & filled_mask] = False
                filled_mask = filled_mask | update_mask

            new_y[:, :, target_idx][update_mask] = 1

        return new_y

    def __len__(self):
        return int(np.ceil(len(self._samples) / self.batch_size))

    def __getitem__(self, idx):
        batch_start = idx * self.batch_size
        batch_end = min(batch_start + self.batch_size, len(self._samples))

        x_batch = np.zeros((batch_end - batch_start, self.series_len,
                            self.img_height, self.img_width, 3),
                           dtype=np.float32)
        y_batch = np.zeros((batch_end - batch_start, self.img_height,
                            self.img_width, self.class_num),
                           dtype=np.uint8)

        for batch_idx, sample_idx in enumerate(range(batch_start, batch_end)):
            # apply same tranformations to all frames in a sample
            transform_params = self._image_processor.get_random_transform(
                (self.img_height, self.img_width))

            for series_idx, frame_name in enumerate(self._samples[sample_idx]):
                # load image
                img_byte_string = self.zip_file.read(
                    'images/{}.jpg'.format(frame_name))
                img = np.frombuffer(img_byte_string, dtype=np.uint8)
                img = cv2.imdecode(img, cv2.IMREAD_COLOR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # resize to output size
                img = cv2.resize(img, (self.img_width, self.img_height),
                                 interpolation=cv2.INTER_LINEAR)

                # apply augmentation
                img = self._image_processor.apply_transform(
                    img, transform_params)

                x_batch[batch_idx, series_idx, :, :, :] = img

            # load mask
            mask_byte_string = self.zip_file.read('segments/{}.png'.format(
                self._samples[sample_idx][-1]))
            mask = np.frombuffer(mask_byte_string, dtype=np.uint8)
            mask = cv2.imdecode(mask, cv2.IMREAD_GRAYSCALE)

            # resize to output size
            mask = cv2.resize(mask, (self.img_width, self.img_height),
                              interpolation=cv2.INTER_NEAREST)

            # transform graysacale mask to class_num channel mask
            mask = self._unpack_to_classes_with_mapping(mask)

            # disable some transformations for mask and apply augmentation
            transform_params['channel_shift_intensity'] = None
            transform_params['brightness'] = None
            mask = self._image_processor.apply_transform(
                mask, transform_params)

            y_batch[batch_idx, :, :, :] = mask

        return x_batch, y_batch