def get_data(data_info, data_path=args.data_path, image_folder=args.images_folder, mask_folder=args.masks_folder, image_type=args.image_type, mask_type=args.mask_type): x = [] y = [] for _, row in data_info.iterrows(): filename = get_fullname(row['name'], row['position']) image_path = get_filepath(data_path, row['name'], image_folder, filename, file_type=image_type) mask_path = get_filepath(data_path, row['name'], mask_folder, filename, file_type=mask_type) x.append(read_tensor(image_path)) y.append(read_tensor(mask_path)) x = np.array(x) y = np.array(y) y = y.reshape([*y.shape, 1]) return x, y
def get_input_pair(self, data_info_row): if len(self.channels) == 0: raise Exception('You have to specify at least one channel.') instance_name = '_'.join( [data_info_row['name'], data_info_row['position']]) image_path = get_filepath(self.dataset_path, data_info_row['dataset_folder'], self.images_folder, instance_name, file_type=self.image_type) mask_path = get_filepath(self.dataset_path, data_info_row['dataset_folder'], self.masks_folder, instance_name, file_type=self.mask_type) images_array = filter_by_channels(read_tensor(image_path), self.channels) if images_array.ndim == 2: images_array = np.expand_dims(images_array, -1) masks_array = read_tensor(mask_path) if self.channels[0] == 'rgb': rgb_tensor = images_array[:, :, :3].astype(np.uint8) rgb_aug = Compose( [OneOf([RGBShift(), CLAHE(clip_limit=2)], p=0.4)], p=0.9) augmented_rgb = rgb_aug(image=rgb_tensor, mask=masks_array) images_array = np.concatenate( [augmented_rgb['image'], images_array[:, :, 3:]], axis=2) masks_array = augmented_rgb['mask'] aug = Compose([ RandomRotate90(), Flip(), OneOf([ RandomSizedCrop(min_max_height=(int( self.image_size * 0.7), self.image_size), height=self.image_size, width=self.image_size) ], p=0.4), ToTensor() ]) augmented = aug(image=images_array, mask=masks_array) augmented_images = augmented['image'] augmented_masks = augmented['mask'] return {'features': augmented_images, 'targets': augmented_masks}
def stratify(data_info, data_path=args.data_path, test_size=0.2, random_state=42, instance_type=args.instance_type, instances_folder=args.instances_folder): X, _ = get_data(data_info) areas = [] for _, row in data_info.iterrows(): instance_name = get_fullname(row['name'], row['position']) instance_path = get_filepath(data_path, row['name'], instances_folder, instance_name, instance_name, file_type=instance_type) areas.append(get_area(instance_path)) labels = get_labels(np.array(areas)) sss = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state) return sss.split(X, labels)
def predict(data_path, model_weights_path, network, test_df_path, save_path, size, channels): model = get_model(network) model.encoder.conv1 = nn.Conv2d(count_channels(args.channels), 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) checkpoint = torch.load(model_weights_path, map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) test_df = pd.read_csv(test_df_path) predictions_path = os.path.join(save_path, "predictions") if not os.path.exists(predictions_path): os.makedirs(predictions_path, exist_ok=True) print("Prediction directory created.") for _, image_info in tqdm(test_df.iterrows()): filename = '_'.join([image_info['name'], image_info['position']]) image_path = get_filepath(data_path, image_info['dataset_folder'], 'images', filename, file_type='tiff') image_tensor = filter_by_channels(read_tensor(image_path), channels) if image_tensor.ndim == 2: image_tensor = np.expand_dims(image_tensor, -1) image = transforms.ToTensor()(image_tensor) prediction = model.predict( image.view(1, count_channels(channels), size, size)) result = prediction.view(size, size).detach().numpy() cv.imwrite(get_filepath(predictions_path, filename, file_type='png'), result * 255)
def save_split(split_info, filename, save_path): split_info.to_csv(get_filepath(save_path, filename, file_type='csv'), index=False)
def geo_split( data_path=args.data_path, markup_path=args.markup_path, mask_type=args.mask_type, masks_folder=args.masks_folder, polygons_folder=args.polygons_folder, test_threshold=0.2, val_bottom_threshold=0.2, val_threshold=0.3, ): datasets = get_folders(data_path) geojson_markup = gp.read_file(markup_path) minY, maxY = get_height_bounds(geojson_markup) height = maxY - minY cols = ['dataset_folder', 'name', 'position'] train_df = pd.DataFrame(columns=cols) val_df = pd.DataFrame(columns=cols) test_df = pd.DataFrame(columns=cols) overall_sizes = {'test': 0, 'train': 0, 'val': 0, 'deleted': 0} for dataset_dir in datasets: polys_path = os.path.join(data_path, dataset_dir, polygons_folder) print(dataset_dir) deleted = 0 train = 0 test = 0 val = 0 for poly_name in os.listdir(polys_path): instance_geojson_path = os.path.join(polys_path, poly_name) instance_geojson = gp.read_file(instance_geojson_path) if geojson_markup.crs != instance_geojson.crs: geojson_markup = geojson_markup.to_crs(instance_geojson.crs) minY, maxY = get_height_bounds(geojson_markup) height = maxY - minY instance_minY, instance_maxY = get_height_bounds(instance_geojson) name, position = get_instance_info(poly_name) masks_path = os.path.join(data_path, dataset_dir, masks_folder) mask_path = get_filepath(masks_path, get_fullname(name, position), file_type=mask_type) mask = Image.open(mask_path) mask_array = np.array(mask) mask_pixels = np.count_nonzero(mask_array) center_pixels = np.count_nonzero(mask_array[10:-10, 10:-10]) border_pixels = mask_pixels - center_pixels if mask_pixels > mask_array.size * 0.001 and center_pixels > border_pixels: if instance_maxY < minY + height * test_threshold: test += 1 test_df = add_record(test_df, dataset_folder=name, name=name, position=position) elif instance_maxY < minY + height * val_threshold \ and instance_minY > minY + height * val_bottom_threshold: val += 1 val_df = add_record(val_df, dataset_folder=name, name=name, position=position) else: train += 1 train_df = add_record(train_df, dataset_folder=name, name=name, position=position) else: deleted += 1 print("Train size", train, "Validation size", val, "Test size", test) print(f"{deleted} images were deleted") overall_sizes = update_overall_sizes(overall_sizes, test, train, val, deleted) print("Overall sizes", overall_sizes) return train_df, val_df, test_df