def process(img, augment): img = bytes_to_file(img) try: img_A, img_B = read_split_image(img) if augment: # augment the image by: # 1) enlarge the image # 2) random crop the image back to its original size # NOTE: image A and B needs to be in sync as how much # to be shifted w, h = img_A.shape multiplier = random.uniform(1.00, 1.20) # add an eps to prevent cropping issue nw = int(multiplier * w) + 1 nh = int(multiplier * h) + 1 shift_x = int(np.ceil(np.random.uniform(0.01, nw - w))) shift_y = int(np.ceil(np.random.uniform(0.01, nh - h))) img_A = shift_and_resize_image(img_A, shift_x, shift_y, nw, nh) img_B = shift_and_resize_image(img_B, shift_x, shift_y, nw, nh) img_A = normalize_image(img_A) img_B = normalize_image(img_B) # 2D to 3D matrix img_A = np.reshape(img_A, [img_A.shape[0], img_A.shape[1], 1]) img_B = np.reshape(img_B, [img_B.shape[0], img_B.shape[1], 1]) return np.concatenate([img_A, img_B], axis=2) finally: img.close()
def dataloader(images, labels, anchors, batch_size=64, augment=True): """ images: [num_images, image_width, image_height, 3]\n labels: [num_labels, num_gt, 4(tcx, tcy, tw, th)]\n returns: ([batch_size, image_width, image_height, 3], [batch_size, num_boxes, 5(conf, tcx, tcy, tw, th)]) """ data_keys = np.arange(len(images)) while True: selected_keys = np.random.choice( data_keys, replace=False, size=batch_size) image_batch = [] label_batch = [] for key in selected_keys: image = np.array(images[key], dtype=np.float32) label = np.array(labels[key], dtype=np.float32) # do augmentation if augment: image, label = random_flip(image, label) image, label = random_rotate(image, label) image = random_brightness(image) image = np.array(image, dtype=np.float32) image = normalize_image(image) image_batch.append(image) label_batch.append(label) gt_batch = generate_gt(label_batch, anchors) yield (np.array(image_batch, dtype=np.float32), np.array(gt_batch, dtype=np.float32))
def _parse_train_data(self, data): is_crowds = data['gt_is_crowd'] classes = data['gt_classes'] boxes = data['gt_bboxes'] masks = data['gt_masks'] image_height = data['height'] image_width = data['width'] # Skips annotations with `is_crowd` = True. # Todo: Need to understand control_dependeicies and tf.gather # if self._skip_crowd_during_training and self._is_training: # num_groundtrtuhs = tf.shape(input=classes)[0] # with tf.control_dependencies([num_groundtrtuhs, is_crowds]): # indices = tf.cond( # pred=tf.greater(tf.size(input=is_crowds), 0), # true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0], # false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) # classes = tf.gather(classes, indices) # boxes = tf.gather(boxes, indices) # masks = tf.gather(masks, indices) # read and normalize the image image = data['image'] # convert image to range [0, 1], faciliate augmentation image = normalize_image(image) # we already resize the image when creating tfrecord # image = tf.image.resize(image, [self._output_size, self._output_size]) # Ignore the gray image image = tf.cond( tf.equal(tf.shape(image)[-1], tf.constant(3)), true_fn=lambda: image, false_fn=lambda: tf.ones([image_width, image_height, 3]) ) # resize mask masks = tf.expand_dims(masks, axis=-1) masks = tf.image.resize(masks, [self._proto_output_size, self._proto_output_size], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) masks = tf.cast(masks + 0.5, tf.int64) masks = tf.squeeze(masks) masks = tf.cast(masks, tf.float32) # Todo: SSD data augmentation (Photometrics, expand, sample_crop, mirroring) # data augmentation randomly # image, boxes, masks, classes = augmentation.random_augmentation(image, boxes, masks, self._output_size, # self._proto_output_size, classes) # remember to unnormalized the bbox boxes = boxes * self._output_size # number of object in training sample num_obj = tf.size(classes) # resized boxes for proto output size boxes_norm = boxes * (self._proto_output_size / self._output_size) # matching anchors cls_targets, box_targets, max_id_for_anchors, match_positiveness = self._anchor_instance.matching( self._match_threshold, self._unmatched_threshold, boxes, classes) # Padding classes and mask to fix length [None, num_max_fix_padding, ...] # Background --> 0 num_padding = self._num_max_fix_padding - tf.shape(classes)[0] pad_classes = tf.zeros([num_padding], dtype=tf.int64) pad_boxes = tf.zeros([num_padding, 4]) pad_masks = tf.zeros([num_padding, self._proto_output_size, self._proto_output_size]) if tf.shape(classes)[0] == 1: masks = tf.expand_dims(masks, axis=0) masks = tf.concat([masks, pad_masks], axis=0) classes = tf.concat([classes, pad_classes], axis=0) boxes = tf.concat([boxes, pad_boxes], axis=0) boxes_norm = tf.concat([boxes_norm, pad_boxes], axis=0) labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'bbox': boxes, 'bbox_for_norm': boxes_norm, 'positiveness': match_positiveness, 'classes': classes, 'num_obj': num_obj, 'mask_target': masks, 'max_id_for_anchors': max_id_for_anchors } return image, labels
def _parse_eval_data(self, data): is_crowds = data['gt_is_crowd'] classes = data['gt_classes'] boxes = data['gt_bboxes'] masks = data['gt_masks'] image_height = data['height'] image_width = data['width'] # Skips annotations with `is_crowd` = True. # TODO: Need to understand control_dependeicies and tf.gather if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(input=classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( pred=tf.greater(tf.size(input=is_crowds), 0), true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0], false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf. int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) masks = tf.gather(masks, indices) # read and normalize the image image = data['image'] image = normalize_image(image) # Ignore the gray image image = tf.cond( tf.equal(tf.shape(image)[-1], tf.constant(3)), true_fn=lambda: image, false_fn=lambda: tf.ones([image_width, image_height, 3])) # resize mask masks = tf.expand_dims(masks, axis=-1) # using nearest neighbor to make sure the mask still in binary masks = tf.image.resize( masks, [self._proto_output_size, self._proto_output_size], method=tf.image.ResizeMethod.BILINEAR) masks = tf.cast(masks + 0.5, tf.int64) masks = tf.squeeze(tf.cast(masks, tf.float32)) # resize boxes for resized image boxes = boxes * self._output_size # number of object in training sample num_obj = tf.size(classes) # resized boxes for proto output size boxes_norm = boxes * (self._proto_output_size / self._output_size) # matching anchors cls_targets, box_targets, max_id_for_anchors, match_positiveness = self._anchor_instance.matching( self._match_threshold, self._unmatched_threshold, boxes, classes) # Padding classes and mask to fix length [None, num_max_fix_padding, ...] num_padding = self._num_max_fix_padding - tf.shape(classes)[0] pad_classes = tf.zeros([num_padding], dtype=tf.int64) pad_boxes = tf.zeros([num_padding, 4]) pad_masks = tf.zeros( [num_padding, self._proto_output_size, self._proto_output_size]) if tf.shape(classes)[0] == 1: masks = tf.expand_dims(masks, axis=0) masks = tf.concat([masks, pad_masks], axis=0) classes = tf.concat([classes, pad_classes], axis=0) boxes = tf.concat([boxes, pad_boxes], axis=0) boxes_norm = tf.concat([boxes_norm, pad_boxes], axis=0) labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'bbox': boxes, 'bbox_for_norm': boxes_norm, 'positiveness': match_positiveness, 'classes': classes, 'num_obj': num_obj, 'mask_target': masks, 'max_id_for_anchors': max_id_for_anchors } return image, labels