def __data_generation(self, labels_temp): x = np.empty((self.batch_size, self.image_height, self.image_width, self.channels)) y = np.empty((self.batch_size, self.label_dim)) for i, f in enumerate(labels_temp): image = cv2.imread(os.path.join(self.images_dir, f + ".jpg")) orig_width = int(image.shape[1]) orig_height = int(image.shape[0]) # Generate edge image result = ed.auto_canny(image, self.multi_channel) if not self.multi_channel: result = np.reshape(result, (result.shape[0], result.shape[1], 1)) # Read ground truth bboxes = [] tree = et.parse(os.path.join(self.labels_dir, f + ".xml")) root = tree.getroot() for obj in root.findall("./object"): label_name = obj.find('name').text label = self.class_labels.index(label_name) bndbox = obj.find('bndbox') xmin = int(bndbox.find('xmin').text) ymin = int(bndbox.find('ymin').text) xmax = int(bndbox.find('xmax').text) ymax = int(bndbox.find('ymax').text) bboxes.append((xmin, ymin, xmax, ymax, label)) if np.random.randint(0, 100) <= 50: # Create none class window # Create label vector label = 0.0 # Mask out any objects with white noise for box in bboxes: result[box[1]:box[3], box[0]:box[2]] = np.zeros( (box[3] - box[1], box[2] - box[0], self.channels)) # Cut out random window scale = np.random.randint(1, 6) rescaled_width = scale * orig_width rescaled_height = scale * orig_height while self.image_height >= rescaled_height or self.image_width >= rescaled_width: scale += 1 rescaled_width = scale * orig_width rescaled_height = scale * orig_height result = cv2.resize(result, (rescaled_width, rescaled_height)) xmin = np.random.randint(0, rescaled_width - self.image_width) ymin = np.random.randint(0, rescaled_height - self.image_height) xmax = xmin + self.image_width ymax = ymin + self.image_height window = result[ymin:ymax, xmin:xmax] else: # Create object class window # Select random target object target_box = random.choice(bboxes) bboxes.remove(target_box) # Create label vector label = 1.0 # Find image region with object (bbox + margin) if target_box[2] - target_box[0] > target_box[3] - target_box[ 1]: margin = int(float(target_box[2] - target_box[0]) * 0.2) window_xmin = max(target_box[0] - margin, 0) window_xmax = min(target_box[2] + margin, orig_width) window_ymin = max(target_box[1] - margin, 0) window_ymax = min(target_box[3] + margin, orig_height) cutout = result[window_ymin:window_ymax, window_xmin:window_xmax] else: margin = int(float(target_box[3] - target_box[1]) * 0.2) window_ymin = max(target_box[1] - margin, 0) window_ymax = min(target_box[3] + margin, orig_height) window_xmin = max(target_box[0] - margin, 0) window_xmax = min(target_box[2] + margin, orig_width) cutout = result[window_ymin:window_ymax, window_xmin:window_xmax] if self.use_augmentation: # Augment 50% of data points if np.random.randint(0, 100) <= 50: rand_val = np.random.randint(0, 3) # Resize to min 50% of size if rand_val == 0: aspect_ration = float(cutout.shape[1]) / float( cutout.shape[0]) new_height = float( cutout.shape[0]) * np.random.uniform(0.5, 1.0) new_width = new_height * aspect_ration cutout = cv2.resize( cutout, (int(new_width), int(new_height))) # Rotate by max +/- 90° elif rand_val == 1: angle = round(np.random.uniform(-1, 1) * 90.0) image_center = tuple( np.array(cutout.shape[1::-1]) / 2) rot_mat = cv2.getRotationMatrix2D( image_center, angle, 1.0) cutout = cv2.warpAffine(cutout, rot_mat, cutout.shape[1::-1], flags=cv2.INTER_LINEAR) # Flip image vertically else: cutout = cv2.flip(cutout, 1) if not self.multi_channel: cutout = np.reshape(cutout, (cutout.shape[0], cutout.shape[1], 1)) # Place image region centred on square black background if cutout.shape[1] > cutout.shape[0]: window = np.zeros( (cutout.shape[1], cutout.shape[1], self.channels)) margin = int((window.shape[0] - cutout.shape[0]) / 2) window[margin:(margin + cutout.shape[0]), :, :] = cutout else: window = np.zeros( (cutout.shape[0], cutout.shape[0], self.channels)) margin = int((window.shape[1] - cutout.shape[1]) / 2) window[:, margin:(margin + cutout.shape[1]), :] = cutout window = cv2.resize(window, (self.image_width, self.image_height)) window = np.array(window, dtype=np.float) if not self.multi_channel: window = np.reshape(window, (window.shape[0], window.shape[1], 1)) window /= 255.0 x[i] = window y[i] = label return x, y
def predict(self, image): orig_width = image.shape[1] orig_height = image.shape[0] if self.use_hed: edge_image = self.hed.get_edge_image(image, orig_width, orig_height, normalized=False) edge_image = np.reshape( edge_image, (edge_image.shape[0], edge_image.shape[1], 1)) else: edge_image = ed.auto_canny(image, self.use_multichannel) if not self.use_multichannel: edge_image = np.reshape( edge_image, (edge_image.shape[0], edge_image.shape[1], 1)) # Zero padding to make square if edge_image.shape[0] > edge_image.shape[1]: x_padding = edge_image.shape[0] - edge_image.shape[1] x_padding_start = round(x_padding / 2.0) x_end = x_padding_start + edge_image.shape[1] new_edge_image = np.zeros( (edge_image.shape[0], edge_image.shape[1] + x_padding, edge_image.shape[2])) new_edge_image[:, x_padding_start:x_end, :] = edge_image[:, :, :] edge_image = new_edge_image elif edge_image.shape[1] > edge_image.shape[0]: y_padding = edge_image.shape[1] - edge_image.shape[0] y_padding_top = round(y_padding / 2.0) y_end = y_padding_top + edge_image.shape[0] new_edge_image = np.zeros( (edge_image.shape[0] + y_padding, edge_image.shape[1], edge_image.shape[2])) new_edge_image[y_padding_top:y_end, :, :] = edge_image[:, :, :] edge_image = new_edge_image # Run exhaustive sliding window result = [] model = self.__get_model() model.load_weights(self.weight_file) window_width = self.image_width window_height = self.image_height scales = range(1, 6) overlap = 0.5 for scale in scales: image_width = round(scale * window_width) image_height = round(scale * window_height) resize_x = float(orig_width) / float(image_width) resize_y = float(orig_height) / float(image_height) steps = int(((scale / overlap) - 1)) for x in range(0, steps): x_offset = x * (window_width * overlap) for y in range(0, steps): y_offset = y * (window_height * overlap) resized_image = np.array(cv2.resize( edge_image, (image_width, image_height)), dtype=np.float) if self.use_hed: resized_image = np.reshape(resized_image, (resized_image.shape[0], resized_image.shape[1], 1)) xmin = int(x_offset) xmax = int(x_offset + window_width) ymin = int(y_offset) ymax = int(y_offset + window_height) window = resized_image[ymin:ymax, xmin:xmax] window = np.reshape( window, (1, window.shape[0], window.shape[1], window.shape[2])) window = window / 255.0 prediction = model.predict(window, 1)[0] true_xmin = int(x_offset * resize_x) true_ymin = int(y_offset * resize_y) true_xmax = true_xmin + int(window_width * resize_x) true_ymax = true_ymin + int(window_height * resize_y) window_result = [ true_xmin, true_ymin, true_xmax, true_ymax, prediction ] result.append(window_result) return result
def predict(self, image, use_refinement=True): orig_width = image.shape[1] orig_height = image.shape[0] if self.use_hed: edge_image = self.hed.get_edge_image(image, orig_width, orig_height, normalized=False) edge_image = np.reshape( edge_image, (edge_image.shape[0], edge_image.shape[1], 1)) else: edge_image = ed.auto_canny(image, self.use_multichannel) if not self.use_multichannel: edge_image = np.reshape( edge_image, (edge_image.shape[0], edge_image.shape[1], 1)) # Zero padding to make square if edge_image.shape[0] > edge_image.shape[1]: x_padding = edge_image.shape[0] - edge_image.shape[1] x_padding_start = round(x_padding / 2.0) x_end = x_padding_start + edge_image.shape[1] new_edge_image = np.zeros( (edge_image.shape[0], edge_image.shape[1] + x_padding, edge_image.shape[2])) new_edge_image[:, x_padding_start:x_end, :] = edge_image[:, :, :] edge_image = new_edge_image elif edge_image.shape[1] > edge_image.shape[0]: y_padding = edge_image.shape[1] - edge_image.shape[0] y_padding_top = round(y_padding / 2.0) y_end = y_padding_top + edge_image.shape[0] new_edge_image = np.zeros( (edge_image.shape[0] + y_padding, edge_image.shape[1], edge_image.shape[2])) new_edge_image[y_padding_top:y_end, :, :] = edge_image[:, :, :] edge_image = new_edge_image # Run exhaustive sliding window result = [] model = self.__get_model() model.load_weights(self.weight_file) window_width = self.image_width window_height = self.image_height mask_width = int(float(self.image_width) * 0.25) mask_steps_x = 4 mask_height = int(float(self.image_height) * 0.25) mask_steps_y = 4 scales = range(1, 6) overlap = 0.5 for scale in scales: image_width = round(scale * window_width) image_height = round(scale * window_height) resized_image = np.array(cv2.resize(edge_image, (image_width, image_height)), dtype=np.float) if self.use_hed: resized_image = np.reshape( resized_image, (resized_image.shape[0], resized_image.shape[1], 1)) resize_x = float(orig_width) / float(image_width) resize_y = float(orig_height) / float(image_height) steps = int(((scale / overlap) - 1)) for x in range(0, steps): x_offset = x * (window_width * overlap) for y in range(0, steps): y_offset = y * (window_height * overlap) xmin = int(x_offset) xmax = int(x_offset + window_width) ymin = int(y_offset) ymax = int(y_offset + window_height) window = resized_image[ymin:ymax, xmin:xmax] if self.use_multichannel: window = np.reshape(window, (1, window.shape[0], window.shape[1], window.shape[2])) else: window = np.reshape( window, (1, window.shape[0], window.shape[1], 1)) window = window / 255.0 mask_x_min = 0 mask_x_max = 0 mask_y_min = 0 mask_y_max = 0 original_prediction = model.predict(window, 1)[0] # Mask out areas and reevaluate for box refinement if use_refinement and original_prediction >= 0.75: for i in range(mask_steps_x): new_window = self.__mask_out_region( window, 0, 0, i * mask_width, self.image_height) new_prediction = model.predict(new_window, 1)[0] if original_prediction < new_prediction: window = new_window mask_x_min = i * mask_width else: break for i in range(mask_steps_x): new_window = self.__mask_out_region( window, self.image_width - i * mask_width, 0, self.image_width, self.image_height) new_prediction = model.predict(new_window, 1)[0] if original_prediction < new_prediction: window = new_window mask_x_max = i * mask_width else: break for i in range(mask_steps_y): new_window = self.__mask_out_region( window, 0, 0, self.image_width, i * mask_height) new_prediction = model.predict(new_window, 1)[0] if original_prediction < new_prediction: window = new_window mask_y_min = i * mask_height else: break for i in range(mask_steps_y): new_window = self.__mask_out_region( window, 0, self.image_height - i * mask_height, self.image_width, self.image_height) new_prediction = model.predict(new_window, 1)[0] if original_prediction < new_prediction: window = new_window mask_y_max = i * mask_height else: break true_xmin = int((x_offset + mask_x_min) * resize_x) true_ymin = int((y_offset + mask_y_min) * resize_y) true_xmax = true_xmin + int( (window_width - mask_x_max) * resize_x) true_ymax = true_ymin + int( (window_height - mask_y_max) * resize_y) window_result = [ true_xmin, true_ymin, true_xmax, true_ymax, original_prediction ] result.append(window_result) return result