def get_anchor_data_gt(img_datas, class_count, C, mode="train"): ''' 生成用于RPN网络训练数据集的迭代器 :param img_data: 原始数据,list,每个元素都是一个字典类型,存放着每张图片的相关信息 all_img_data[0] = {'width': 500, 'height': 500, 'bboxes': [{'y2': 500, 'y1': 27, 'x2': 183, 'x1': 20, 'class': 'person', 'difficult': False}, {'y2': 500, 'y1': 2, 'x2': 249, 'x1': 112, 'class': 'person', 'difficult': False}, {'y2': 490, 'y1': 233, 'x2': 376, 'x1': 246, 'class': 'person', 'difficult': False}, {'y2': 468, 'y1': 319, 'x2': 356, 'x1': 231, 'class': 'chair', 'difficult': False}, {'y2': 450, 'y1': 314, 'x2': 58, 'x1': 1, 'class': 'chair', 'difficult': True}], 'imageset': 'test', 'filepath': './datasets/VOC2007/JPEGImages/000910.jpg'} :param class_count: 数据集中各个类别的样本个数,字典型 :param C: 相关配置参数 :param mode: :return: 返回一个数据迭代器 ''' while True: if mode == "train": #打乱数据集 random.shuffle(img_datas) for img_data in img_datas: # try: #数据增强 if mode == "train": img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) else: img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) #确保图像尺寸不发生改变 (width, height) = (img_data_aug['width'], img_data_aug['height']) (rows, cols, _) = x_img.shape assert cols == width assert rows == height #将图像的短边缩放到600尺寸 (resized_width, resized_height) = get_new_img_size(width, height, C.im_size) x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) dump_process_img('resized', x_img) # show_rpn_input_img(x_img) x_img = handleImg(x_img, C) y_rpn_cls, y_rpn_regr = getdata_for_rpn(C, img_data_aug, width, height, resized_width, resized_height) y_rpn_regr[:, :, :, y_rpn_regr.shape[1] // 2:] *= C.std_scaling yield np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug
def get_anchor_gt(all_img_data, cfg, img_size_calc_function, backend, mode='train'): if mode == 'train': np.random.shuffle(all_img_data) for img_data in all_img_data: if mode == 'train': img_data_aug, x_img = da.augment(img_data, cfg, augment=True) else: img_data_aug, x_img = da.augment(img_data, cfg, augment=False) (depth, height, width) = (img_data_aug['depth'], img_data_aug['height'], img_data_aug['width']) (deps, rows, cols) = x_img.shape assert deps == depth assert rows == height assert cols == width try: y_rpn_cls, y_rpn_regr = calc_rpn(cfg, img_data_aug, depth, height, width, img_size_calc_function) except: continue # zero-center by mean voxel, and preprocess scan x_img = x_img.astype('float32') x_img = (x_img - cfg.min_bound) / (cfg.max_bound - cfg.max_bound) #x_img = np.clip(x_img, 0, 1) x_img = np.expand_dims(x_img, axis=0) # expand channel dim x_img = np.expand_dims(x_img, axis=0) # expand batch dims if backend == 'tf': x_img = np.transpose(x_img, (0, 2, 3, 4, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 4, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 4, 1)) yield np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug
def __getitem__(self, idx): idx = (idx % self.nTrain) + 1 nameIn, nameTar = self.getFileName(idx) imgIn = sio.imread(nameIn) imgTar = sio.imread(nameTar) imgIn, imgTar = data_augment.randomCrop(imgIn, imgTar, self.patchSize) imgIn, imgTar = data_augment.augment(imgIn, imgTar) return data_augment.np2PytorchTensor(imgIn, imgTar)
def get_anchor_gt(all_img_data, class_count, C, img_length_calc_function, backend, mode='train'): # The following line is not useful with Python 3.5, it is kept for the legacy # all_img_data = sorted(all_img_data) sample_selector = SampleSelector(class_count) while True: if mode == 'train': np.random.shuffle(all_img_data) for img_data in all_img_data: try: if C.balanced_classes and sample_selector.skip_sample_for_balanced_class( img_data): continue # read in image, and optionally add augmentation if mode == 'train': img_data_aug, x_img = data_augment.augment(img_data, C, augment=True) else: img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) (width, height) = (img_data_aug['width'], img_data_aug['height']) (rows, cols, _) = x_img.shape assert cols == width assert rows == height # get image dimensions for resizing (resized_width, resized_height) = get_new_img_size(width, height, C.im_size) # resize the image so that smalles side is length = 600px x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) try: y_rpn_cls, y_rpn_regr = calc_rpn(C, img_data_aug, width, height, resized_width, resized_height, img_length_calc_function) except: continue # Zero-center by mean pixel, and preprocess image x_img = x_img[:, :, (2, 1, 0)] # BGR -> RGB x_img = x_img.astype(np.float32) x_img[:, :, 0] -= C.img_channel_mean[0] x_img[:, :, 1] -= C.img_channel_mean[1] x_img[:, :, 2] -= C.img_channel_mean[2] x_img /= C.img_scaling_factor x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0) y_rpn_regr[:, y_rpn_regr.shape[1] // 2:, :, :] *= C.std_scaling if backend == 'tf': x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) yield np.copy(x_img), [ np.copy(y_rpn_cls), np.copy(y_rpn_regr) ], img_data_aug except Exception as e: print(e) continue
def get_anchor_gt(all_img_data, class_count, C, backend, mode='train'): # The following line is not useful with Python 3.5, it is kept for the legacy # all_img_data = sorted(all_img_data) sample_selector = SampleSelector(class_count) while True: if mode == 'train': np.random.shuffle(all_img_data) for img_data in all_img_data: try: if C.balanced_classes and sample_selector.skip_sample_for_balanced_class( img_data): continue augmented_data = {} max_count = -1 best_stride = 'original' # read in image, and optionally add augmentation images = data_augment.augment(img_data, C) for image_idx in range(len(images)): # Augment bboxes img_data_aug = copy.deepcopy(img_data) x_img = copy.deepcopy(images[image_idx]) stride = 'original' if (image_idx == 1): stride = 'right' elif (image_idx == 2): stride = 'left' elif (image_idx == 3): stride = 'top' elif (image_idx == 4): stride = 'bottom' img_data_aug['stride'] = stride # x_img = cv2.imread(img_data_aug['filepath']) (width, height) = (img_data_aug['width'], img_data_aug['height']) (rows, cols, _) = x_img.shape assert cols == width assert rows == height # get image dimensions for resizing (resized_width, resized_height) = get_new_img_size( width, height, C.im_size) num_bboxes = len(img_data_aug['bboxes']) best = {} best['anchor'] = -1 * np.ones((num_bboxes, 4)).astype(int) best['iou'] = C.rpn_max_overlap * np.ones( num_bboxes).astype(np.float32) best['x'] = np.zeros((num_bboxes, 4)).astype(int) best['dx'] = np.zeros((num_bboxes, 4)).astype(np.float32) best['num_anchors'] = np.zeros(num_bboxes).astype(int) best['module'] = np.zeros(num_bboxes).astype(int) best['type'] = np.zeros(num_bboxes).astype(int) best['neutral_anchors'] = {'M1': [], 'M2': [], 'M3': []} # resize the image so that smalles side is length = 600px x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) # final_image = np.zeros((C.im_size, C.im_size, 3)) # final_image[:resized_height, :resized_width, :] = x_img # x_img = final_image # TODO Remove hardcode # print('shape',x_img.shape) try: y_rpn_cls, y_rpn_regr, x_img, best = calc_rpn( C, img_data_aug, width, height, resized_width, resized_height, 'M1', np.copy(x_img), best, stride) y_rpn_cls2, y_rpn_regr2, x_img, best = calc_rpn( C, img_data_aug, width, height, resized_width, resized_height, 'M2', np.copy(x_img), best, stride) y_rpn_cls_M1, y_rpn_regr_M1, y_rpn_cls_M2, y_rpn_regr_M2, y_rpn_cls_M3, y_rpn_regr_M3, anchor_count = calc_rpn( \ C, img_data_aug, width, height, resized_width, resized_height, 'M3', np.copy(x_img), best, stride \ ) # y_rpn_cls_M1, y_rpn_regr_M1 = findBest(C, 'M1', best, resized_width, resized_height, img_data) # y_rpn_cls_M2, y_rpn_regr_M2 = findBest(C, 'M2', best, resized_width, resized_height, img_data) # y_rpn_cls_M3, y_rpn_regr_M3 = findBest(C, 'M3', best, resized_width, resized_height, img_data) except Exception as e: print('Failure', e) print(traceback.format_exc()) continue # Zero-center by mean pixel, and preprocess image x_img = x_img[:, :, (2, 1, 0)] # BGR -> RGB x_img = x_img.astype(np.float32) x_img[:, :, 0] -= C.img_channel_mean[0] x_img[:, :, 1] -= C.img_channel_mean[1] x_img[:, :, 2] -= C.img_channel_mean[2] x_img /= C.img_scaling_factor x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0) y_rpn_regr_M1[:, y_rpn_regr_M1.shape[1] // 2:, :, :] *= C.std_scaling y_rpn_regr_M2[:, y_rpn_regr_M2.shape[1] // 2:, :, :] *= C.std_scaling y_rpn_regr_M3[:, y_rpn_regr_M3.shape[1] // 2:, :, :] *= C.std_scaling if backend == 'tf': x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls_M1 = np.transpose(y_rpn_cls_M1, (0, 2, 3, 1)) y_rpn_regr_M1 = np.transpose(y_rpn_regr_M1, (0, 2, 3, 1)) y_rpn_cls_M2 = np.transpose(y_rpn_cls_M2, (0, 2, 3, 1)) y_rpn_regr_M2 = np.transpose(y_rpn_regr_M2, (0, 2, 3, 1)) y_rpn_cls_M3 = np.transpose(y_rpn_cls_M3, (0, 2, 3, 1)) y_rpn_regr_M3 = np.transpose(y_rpn_regr_M3, (0, 2, 3, 1)) augmented_data[stride] = [np.copy(x_img), \ [np.copy(y_rpn_cls_M1), np.copy(y_rpn_regr_M1), \ np.copy(y_rpn_cls_M2), np.copy(y_rpn_regr_M2), \ np.copy(y_rpn_cls_M3), np.copy(y_rpn_regr_M3)], \ img_data_aug] if (anchor_count > max_count): max_count = anchor_count best_stride = stride # yield np.copy(x_img), \ # [np.copy(y_rpn_cls_M1), np.copy(y_rpn_regr_M1), \ # np.copy(y_rpn_cls_M2), np.copy(y_rpn_regr_M2), \ # np.copy(y_rpn_cls_M3), np.copy(y_rpn_regr_M3)], \ # img_data_aug yield augmented_data[best_stride] except Exception as e: print(e) print(traceback.format_exc()) continue
def get_anchor_gt(all_img_data, class_mapping, class_count, C, mode='train'): downscale = float(C.rpn_stride) anchor_sizes = C.anchor_box_scales anchor_ratios = C.anchor_box_ratios num_anchors = len(anchor_sizes) * len(anchor_ratios) sample_selector = SampleSelector(class_count) while True: if mode == 'train': random.shuffle(all_img_data) for img_data in all_img_data: if C.balanced_classes and sample_selector.skip_sample_for_balanced_class( img_data): continue # read in image, and optionally add augmentation if mode == 'train': img_data_aug, x_img = data_augment.augment(img_data, C, augment=True) else: img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) (width, height) = (img_data_aug['width'], img_data_aug['height']) (rows, cols, _) = x_img.shape assert cols == width assert rows == height # get image dimensions for resizing (resized_width, resized_height) = get_new_img_size(width, height, C.im_size) # resize the image so that smalles side is length = 600px x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) # calculate the output map size based on the network architecture (output_width, output_height) = get_img_output_length(resized_width, resized_height) x_rois, y_rpn_cls, y_rpn_regr, y_class_num, y_class_regr = calcY( C, class_mapping, img_data_aug, width, height, resized_width, resized_height) if x_rois is None: continue x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0).astype('float32') # Zero-center by mean pixel x_img[:, 0, :, :] -= 103.939 x_img[:, 1, :, :] -= 116.779 x_img[:, 2, :, :] -= 123.68 yield [x_img, x_rois], [y_rpn_cls, y_rpn_regr, y_class_num, y_class_regr]
def __getitem__(self, idx): ''' Reads in an image and all the required sources of information. Also returns a flag tensor where a 0 in: pos 0: indicates pupil center exists pos 1: indicates mask exists pos 2: indicates pupil ellipse exists pos 3: indicates iris ellipse exists ##modified: ''' numClasses = 3 img, label, elParam, pupil_center, cond, imInfo = self.readImage(idx) img, label, pupil_center, elParam = pad2Size(img, label, elParam, pupil_center, self.size) if self.scale: img, label, elParam, pupil_center = self.scaleFn( img, label, elParam, pupil_center) img, label, pupil_center, elParam = augment( img, label, pupil_center, elParam) if self.augFlag else (img, label, pupil_center, elParam) # Modify labels by removing Sclera class label[label == 1] = 0 # If Sclera exists, move it to background. label[label == 2] = 1 # Move Iris to 1 label[label == 3] = 2 # Move Pupil to 2 # Compute edge weight maps spatialWeights = cv2.Canny(label.astype(np.uint8), 0, 1) / 255 spatialWeights = 1 + cv2.dilate(spatialWeights, (3, 3), iterations=1) * 20 # Calculate distMaps for only Iris and Pupil. Pupil: 2. Iris: 1. Rest: 0. distMap = np.zeros((3, *img.shape)) # Find distance map for each class for i in range(0, numClasses): distMap[i, ...] = one_hot2dist(label.astype(np.uint8) == i) # Convert data to torch primitives img = (img - img.mean()) / img.std() img = torch.from_numpy(img).unsqueeze(0).to( self.prec) # Adds a singleton for channels # Groundtruth annotation label = MaskToTensor()(label).to(torch.long) # Pixels weights based on edges - edge pixels have higher weight spatialWeights = torch.from_numpy(spatialWeights).to(self.prec) # Distance map for surface loss distMap = torch.from_numpy(distMap).to(self.prec) # Centers pupil_center = torch.from_numpy(pupil_center).to(torch.float32).to( self.prec) iris_center = torch.from_numpy(elParam[0][:2]).to( self.prec) if not cond[3] else pupil_center.clone() cond = torch.from_numpy(cond).to(self.prec).to(torch.bool) imInfo = torch.from_numpy(imInfo).to(torch.long) # Generate normalized pupil and iris information H = np.array([[2 / img.shape[2], 0, -1], [0, 2 / img.shape[1], -1], [0, 0, 1]]) iris_pts, iris_norm = get_ellipse_info(elParam[0], H, cond[3]) pupil_pts, pupil_norm = get_ellipse_info(elParam[1], H, cond[2]) elNorm = np.stack([iris_norm, pupil_norm], axis=0) # Respect iris first policy elNorm = torch.from_numpy(elNorm).to(self.prec) return (img, label, spatialWeights, distMap, pupil_center, iris_center, elNorm, cond, imInfo)
def get_anchor_gt(all_img_data, class_count, C, backend, mode='train'): all_img_data = sorted(all_img_data) sample_selector = SampleSelector(class_count) while True: if mode == 'train': random.shuffle(all_img_data) for img_data in all_img_data: try: if C.balanced_classes and sample_selector.skip_sample_for_balanced_class(img_data): continue # read in image, and optionally add augmentation if mode == 'train': img_data_aug, x_img = data_augment.augment(img_data, C, augment=True) else: img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) (width, height) = (img_data_aug['width'], img_data_aug['height']) (rows, cols, _) = x_img.shape assert cols == width assert rows == height # get image dimensions for resizing (resized_width, resized_height) = get_new_img_size(width, height, C.im_size) # resize the image so that smalles side is length = 600px x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) try: y_rpn_cls, y_rpn_regr = calc_rpn(C, img_data_aug, width, height, resized_width, resized_height) except: continue # Zero-center by mean pixel, and preprocess image x_img = x_img[:,:, (2, 1, 0)] # BGR -> RGB x_img = x_img.astype(np.float32) x_img[:, :, 0] -= C.img_channel_mean[0] x_img[:, :, 1] -= C.img_channel_mean[1] x_img[:, :, 2] -= C.img_channel_mean[2] x_img /= C.img_scaling_factor x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0) y_rpn_regr[:, y_rpn_regr.shape[1]/2:, :, :] *= C.std_scaling if backend == 'tf': x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) yield np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug except Exception as e: print(e) continue
def get_anchor_gt(all_img_data, C, img_length_calc_function, mode='train'): # sample_selector = SampleSelector(class_count) # Mischen v. Image-Daten im Trainings-Modus while True: # if mode == 'train': # np.random.shuffle(all_img_data) # Für jedes Image im Daten-Set ... for img_data in all_img_data: try: # Überspringen v. Image-Daten ... # if C.balanced_classes and sample_selector.skip_sample_for_balanced_class(img_data): # continue # Lesen & Augmentation v. Image-Datei im Trainings-Modus if mode == 'train': img_data_aug, x_img = data_augment.augment(img_data, C, augment=True) # Lesen v. Image-Datei else: img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) (width, height) = (img_data_aug['width'], img_data_aug['height'] ) # Höhe & Weite nach Augmentation (rows, cols, _) = x_img.shape assert cols == width assert rows == height # Image-Dimensionen nach Resize-Anwendung (resized_width, resized_height) = data_augment.get_new_img_size( width, height, C.im_size) # Resizing v. Image abhängig v. Image-Dimensionen x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) debug_img = x_img.copy() try: # Berechnung Klassifikations-Tensor & Regressions-Tensor d. jeweiligen Images y_rpn_cls, y_rpn_regr, num_pos = calc_rpn( C, img_data_aug, width, height, resized_width, resized_height, img_length_calc_function) except: continue # Zero-center by mean pixel, and preprocess image x_img = x_img[:, :, ( 2, 1, 0)] # Umstrukturierung Channel-Order -> BGR to RGB x_img = x_img.astype(np.float32) x_img[:, :, 0] -= C.img_channel_mean[ 0] # Standardization red image pixel x_img[:, :, 1] -= C.img_channel_mean[ 1] # Standardization yellow image pixel x_img[:, :, 2] -= C.img_channel_mean[ 2] # Standardization green image pixel # Skalierung mit Konfigurations-Variable x_img /= C.img_scaling_factor # Image-Transponierung x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0) y_rpn_regr[:, y_rpn_regr.shape[1] // 2:, :, :] *= C.std_scaling # Tensorflow-Backend erfordert Channel-Size als letzte Dimension x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) yield np.copy(x_img), [ np.copy(y_rpn_cls), np.copy(y_rpn_regr) ], img_data_aug, debug_img, num_pos except Exception as e: print(e) continue
def __getitem__(self, item): if self.caffemodel: # input is BGR order, not normalized img_data = self.dataset[item] if self.preloaded: img = self.img_cache[item] else: img = cv2.imread(img_data['filepath']) if self.type == 'train': img_data, x_img = data_augment.augment(self.dataset[item], self.config, img) gts = img_data['bboxes'].copy() igs = img_data['ignoreareas'].copy() y_center, y_height, y_offset = self.calc_gt_center( gts, igs, radius=2, stride=self.config.down) x_img = x_img.astype(np.float32) x_img -= [103.939, 116.779, 123.68] x_img = torch.from_numpy(x_img).permute([2, 0, 1]) return x_img, [y_center, y_height, y_offset] else: x_img = img.astype(np.float32) x_img -= [103.939, 116.779, 123.68] x_img = torch.from_numpy(x_img).permute([2, 0, 1]) return x_img else: # input is RGB order, and normalized img_data = self.dataset[item] if self.preloaded: img = self.img_cache[item] else: img = Image.open(img_data['filepath']) if self.type == 'train': gts = img_data['bboxes'].copy() igs = img_data['ignoreareas'].copy() x_img, gts, igs = self.preprocess(img, gts, igs) y_center, y_height, y_offset = self.calc_gt_center( gts, igs, radius=2, stride=self.config.down) if self.transform is not None: x_img = self.transform(x_img) return x_img, [y_center, y_height, y_offset] else: if self.transform is not None: x_img = self.transform(img) else: x_img = img return x_img
x = preprocess(x.get_data(), shape, 'coronal') mask = preprocess(mask.get_data(), shape, 'coronal') mask = np.expand_dims(mask, -1) image = np.expand_dims(x, -1) mask_datagen = image_datagen = kp.image.ImageDataGenerator({ key: data_gen_args[key] for key in data_gen_args.keys() if not key in ['brightness_range', 'noise_var_range', 'bias_var_range'] }) image_generator = image_datagen.flow(image, seed=1) mask_generator = image_datagen.flow(mask, seed=1) imgs = [next(image_generator) for _ in range(10)] masks = [ np.where(next(mask_generator) > 0.5, 1, 0).astype('float32') for _ in range(10) ] imgs = np.concatenate(imgs) masks = np.concatenate(masks) for i in range(len(imgs)): img = np.squeeze( augment(imgs[i], masks[i], brightness_range=data_gen_args['brightness_range'], noise_var_range=data_gen_args['noise_var_range'], bias_var_range=data_gen_args['bias_var_range'])) plt.imshow(img, cmap='gray') plt.axis('off') plt.savefig('vis/{}'.format(i)) plt.close()
def get_anchor_gt(all_img_data, C, img_length_calc_function, mode='train'): while True: np.random.shuffle(all_img_data) for i, img_data in enumerate(all_img_data): try: if mode == 'train': img_data_aug, x_img = augment.augment(img_data, C, augment=True) else: img_data_aug, x_img = augment.augment(img_data, C, augment=False) (width, height) = (img_data_aug['width'], img_data_aug['height']) (rows, cols, _) = x_img.shape assert cols == width assert rows == height # get image dimensions for resizing (resized_width, resized_height) = get_new_img_size(width, height, C.im_size) # resized_width, resized_height = 224, 224 # resize the image so that smallest side is length = 600px x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) try: y_rpn_cls, y_rpn_regr = calc_rpn(C, img_data_aug, width, height, resized_width, resized_height, img_length_calc_function) except: continue # Zero-center by mean pixel, and preprocess image x_img = x_img[:, :, (2, 1, 0)] # BGR -> RGB x_img = x_img.astype(np.float32) x_img[:, :, 0] -= C.img_channel_mean[0] x_img[:, :, 1] -= C.img_channel_mean[1] x_img[:, :, 2] -= C.img_channel_mean[2] x_img /= C.img_scaling_factor x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0) y_rpn_regr[:, y_rpn_regr.shape[1] // 2:, :, :] *= C.std_scaling x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) yield np.copy(x_img), [ np.copy(y_rpn_cls), np.copy(y_rpn_regr) ], img_data_aug except Exception as e: print(e) continue
def get_anchor_gt(all_img_data, class_count, C, backend, mode='train'): ''' input: all_img_data: all image data list class_count: the num of class C: configration backend: tf of th ''' all_img_data = sorted(all_img_data, key=lambda x:sorted(x.keys())) #python3 # all_img_data = sorted(all_img_data) python2 # ignore classes that have zero samples and generate a cycle sample_selector = SampleSelector(class_count) while True: if mode == 'train': random.shuffle(all_img_data) for img_data in all_img_data: try: if C.balanced_classes and sample_selector.skip_sample_for_balanced_class(img_data): continue # read in image, and optionally add augmentation if mode == 'train': img_data_aug, x_img = data_augment.augment( img_data, C, augment=True) else: img_data_aug, x_img = data_augment.augment( img_data, C, augment=False) (width, height) = ( img_data_aug['width'], img_data_aug['height']) (rows, cols, _) = x_img.shape assert cols == width assert rows == height # get image dimensions for resizing (resized_width, resized_height) = get_new_img_size( width, height, C.im_size) # resize the image so that smalles side is length = 600px x_img = cv2.resize( x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) try: y_rpn_cls, y_rpn_regr = calc_rpn( C, img_data_aug, width, height, resized_width, resized_height) except: continue # Zero-center by mean pixel, and preprocess image x_img = x_img[:, :, (2, 1, 0)] # BGR -> RGB x_img = x_img.astype(np.float32) x_img[:, :, 0] -= C.img_channel_mean[0] x_img[:, :, 1] -= C.img_channel_mean[1] x_img[:, :, 2] -= C.img_channel_mean[2] x_img /= C.img_scaling_factor x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0) y_rpn_regr[:, int(y_rpn_regr.shape[1]/2):, :, :] *= C.std_scaling if backend == 'tf': x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) yield np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug except Exception as e: print(e) continue
def get_data(C, augment=False): ''' 将label数据转换成和demo一样的格式 { width: height: bboxes: [ {x1: x2: y1: y2: class: } ] imageset: filepath } ''' DATA_PATH = os.path.abspath('data') LABEL_PATH = os.path.join(DATA_PATH, 'labels') IMG_PATH = os.path.join(DATA_PATH, 'raw') TRAIN_SPLIT = 0.8 VAL_SPLIT = 0.1 AUGMENT_NUM = 4 classes_count = { 'wrist': 0, 'near': 0, 'far': 0, } # 一个字典,key为对应类别名称,value对应为类别所对应的样本(标注框)个数 # 分多次训练,分类mapping要固定 classes_mapping = { 'wrist': 0, 'near': 1, 'far': 2, } # 一个字典数据结构,key为对应类别名称,value为对应类别的一个标识index img_data = [] label_files = os.listdir(LABEL_PATH) train_files = random.sample(label_files, math.ceil(len(label_files) * TRAIN_SPLIT)) temp = [file for file in label_files if file not in train_files] val_files = random.sample(temp, math.ceil(len(label_files) * VAL_SPLIT)) test_files = [file for file in temp if file not in val_files] # print('train: {}, val: {}, test: {}'.format( # len(train_files), len(val_files), len(test_files))) def handle_json(file, type): data = json.load(open(file)) bboxes = [] for shape in data['shapes']: classes_name = shape['label'] # 不训练小标注框 # if classes_name != 'wrist': # continue # 小标注框 # if classes_name == 'wrist': # continue bboxes.append({ 'x1': shape['points'][0][0], 'y1': shape['points'][0][1], 'x2': shape['points'][1][0], 'y2': shape['points'][1][1], 'class': classes_name }) if classes_name in classes_count: # classes_count 存储类别以及对应类别的标注框个数 classes_count[classes_name] += 1 else: classes_count[classes_name] = 1 # if classes_name not in classes_mapping: # classes_mapping[classes_name] = len(classes_mapping) img_name = file.split(os.sep)[-1].replace('json', 'jpg') return { 'width': data['imageWidth'], 'height': data['imageHeight'], 'bboxes': bboxes, 'filepath': os.path.join(IMG_PATH, img_name), 'imageset': type } for file in train_files: if augment: for _ in range(AUGMENT_NUM): img_aug, img = data_augment.augment( handle_json(os.path.join(LABEL_PATH, file), 'train'), C, augment) img_data.append(img_aug) else: img_data.append( handle_json(os.path.join(LABEL_PATH, file), 'train')) for file in val_files: if augment: for _ in range(AUGMENT_NUM): img_aug, img = data_augment.augment( handle_json(os.path.join(LABEL_PATH, file), 'val'), C, augment) img_data.append(img_aug) else: img_data.append(handle_json(os.path.join(LABEL_PATH, file), 'val')) for file in test_files: if augment: for _ in range(AUGMENT_NUM): img_aug, img = data_augment.augment( handle_json(os.path.join(LABEL_PATH, file), 'test'), C, augment) img_data.append(img_aug) else: img_data.append(handle_json(os.path.join(LABEL_PATH, file), 'test')) return img_data, classes_count, classes_mapping
def get_anchor_gt(all_img_data, classes_count, cfg, img_length_calc_function, backend, mode='train'): sample_selector = SampleSelector(classes_count) while True: for img_data in all_img_data: try: if cfg.balanced_classes and sample_selector.skip_sample_for_balanced_class( img_data): continue if mode == 'train': img_data_aug, x_img = data_augment.augment(img_data, cfg, augment=True) else: img_data_aug, x_img = data_augment.augment(img_data, cfg, augment=False) (width, height) = (img_data_aug.width, img_data_aug.height) (rows, cols, noting) = x_img.shape assert cols == width assert rows == height resized_width, resized_height, x_img = get_new_img( width, height, x_img, cfg.img_size) # img_data的width和height可以做相应的更新 img_data_aug.width = resized_width img_data_aug.height = resized_height try: y_rpn_cls, y_rpn_regr = calc_rpn(cfg, img_data_aug, width, height, resized_width, resized_height, img_length_calc_function) # print('---------------------') # print(y_rpc_cls, y_rpn_regr) except Exception as eor: print(eor) continue # Zero-center by mean pixel, and preprocess image # 因为opencv读取图片的通道是BGR这里转换为RGB x_img = x_img[:, :, (2, 1, 0)] # BGR -> RGB x_img = x_img.astype(np.float32) x_img[:, :, 0] -= cfg.img_channel_mean[0] x_img[:, :, 1] -= cfg.img_channel_mean[1] x_img[:, :, 2] -= cfg.img_channel_mean[2] x_img /= cfg.img_scaling_factor x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0) y_rpn_regr[:, y_rpn_regr.shape[1] // 2:, :, :] *= cfg.std_scaling x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) yield np.copy(x_img), [ np.copy(y_rpn_cls), np.copy(y_rpn_regr) ], img_data_aug # 减去均值 # 将深度变为第一个维度 # 给图片增加一个维度 # 给回归梯度除上一个规整因子 # 如果用的是tf内核,还是要把深度调到最后一位了 except Exception as e: print(e) continue
def get_anchor_gt(all_img_data, class_mapping, class_count, C, backend, mode='train'): downscale = float(C.rpn_stride) all_img_data = sorted(all_img_data) anchor_sizes = C.anchor_box_scales anchor_ratios = C.anchor_box_ratios num_anchors = len(anchor_sizes) * len(anchor_ratios) sample_selector = SampleSelector(class_count) while True: if mode == 'train': random.shuffle(all_img_data) for img_data in all_img_data: try: if C.balanced_classes and sample_selector.skip_sample_for_balanced_class( img_data): continue # read in image, and optionally add augmentation if mode == 'train': img_data_aug, x_img = data_augment.augment(img_data, C, augment=True) else: img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) (width, height) = (img_data_aug['width'], img_data_aug['height']) (rows, cols, _) = x_img.shape assert cols == width assert rows == height # get image dimensions for resizing (resized_width, resized_height) = get_new_img_size(width, height, C.im_size) # resize the image so that smalles side is length = 600px x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) # calculate the output map size based on the network architecture (output_width, output_height) = get_img_output_length( resized_width, resized_height) try: x_rois, y_rpn_cls, y_rpn_regr, y_class_num, y_class_regr = calcY( C, class_mapping, img_data_aug, width, height, resized_width, resized_height) except: continue # Zero-center by mean pixel x_img = x_img.astype(np.float32) x_img[:, :, 0] -= 103.939 x_img[:, :, 1] -= 116.779 x_img[:, :, 2] -= 123.68 x_img = np.transpose(x_img, (2, 0, 1)) x_img = np.expand_dims(x_img, axis=0) y_rpn_regr[:, y_rpn_regr.shape[1] / 2:, :, :] *= C.std_scaling y_class_regr[:, y_class_regr.shape[1] / 2:, :] *= C.std_scaling if backend == 'tf': x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) yield [np.copy(x_img), np.copy(x_rois)], [ np.copy(y_rpn_cls), np.copy(y_rpn_regr), np.copy(y_class_num), np.copy(y_class_regr) ] except Exception as e: print(e) continue
def get_anchor_gt(all_img_data, C, img_length_calc_function, backend, mode='train'): """ 为rpn网络生成训练数据 :param all_img_data: 列表,元素为字典,包含了原始图片信息 :param C: 配置参数 :param img_length_calc_function: 将图片尺寸转换到feature map尺寸的函数 :param backend: Keras后端,'tf' :param mode: 'train'模式 :return: 生成器,生成resize后的图片,标定好的anchor和回归系数,原始图片的信息 """ while True: ''' if mode == 'train': random.shuffle(all_img_data) # 打乱图片顺序 ''' for img_data in all_img_data: try: # 读入原始图片,并根据配置信息看是否做数据增强 if mode == 'train': img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) else: img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) # 读取原始图片的宽和高 width, height = (img_data['width'], img_data['height']) resized_width = width resized_height = height ''' # 小数字目标太小,不做短边为600的resize了 # 将原始图片resize到输入图片,短边为600 resized_width, resized_height = get_new_img_size(width, height, C.im_size) x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) ''' # 得到标定好的anchor和回归系数 # y_rpn_cls:shape(1,18,m,n), 第二维的前面9个数的值表明了哪些anchor在训练中起作用,后面9个数的值区分正负样本 # y_rpn_regr:shape(1,72,m,n), 第二维的前面36个数是9个anchor是否为正负样本重复4次,后面36个数是对应的回归参数 try: y_rpn_cls, y_rpn_regr = calc_rpn(C, img_data_aug, width, height, resized_width, resized_height, img_length_calc_function) except: continue # 对图片做处理,减去均值,像素归一化,调整维度顺序,增加维度 # x_img = x_img[:, :, (2, 1, 0)] # BGR -> RGB x_img = x_img.astype(np.float32) x_img_2 = np.copy(x_img) x_img[:, :, 0] -= np.mean(x_img[:, :, 0]) x_img[:, :, 1] -= np.mean(x_img[:, :, 1]) x_img[:, :, 2] -= np.mean(x_img[:, :, 2]) # 把第二维后面的36个数乘以4(测试过程中会对应的除以4)[??? ] x_img /= C.img_scaling_factor # [??? 这个配置参数是什么意义] x_img_2 /= C.img_scaling_factor # x_img = np.transpose(x_img, (2, 0, 1)) # 顺时针翻转90度 x_img = np.expand_dims(x_img, axis=0) x_img_2 = np.expand_dims(x_img_2, axis=0) y_rpn_regr[:, y_rpn_regr.shape[1] // 2:, :, :] *= C.std_scaling if backend == 'tf': # x_img = np.transpose(x_img, (0, 2, 3, 1)) y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) yield np.copy(x_img), [ np.copy(y_rpn_cls), np.copy(y_rpn_regr) ], img_data_aug, np.copy(x_img_2) except Exception as e: print(e) continue