def get_data(data_info,
             data_path=args.data_path,
             image_folder=args.images_folder,
             mask_folder=args.masks_folder,
             image_type=args.image_type,
             mask_type=args.mask_type):

    x = []
    y = []
    for _, row in data_info.iterrows():
        filename = get_fullname(row['name'], row['position'])

        image_path = get_filepath(data_path,
                                  row['name'],
                                  image_folder,
                                  filename,
                                  file_type=image_type)
        mask_path = get_filepath(data_path,
                                 row['name'],
                                 mask_folder,
                                 filename,
                                 file_type=mask_type)

        x.append(read_tensor(image_path))
        y.append(read_tensor(mask_path))

    x = np.array(x)
    y = np.array(y)
    y = y.reshape([*y.shape, 1])

    return x, y
Exemple #2
0
    def get_input_pair(self, data_info_row):
        if len(self.channels) == 0:
            raise Exception('You have to specify at least one channel.')

        instance_name = '_'.join(
            [data_info_row['name'], data_info_row['position']])
        image_path = get_filepath(self.dataset_path,
                                  data_info_row['dataset_folder'],
                                  self.images_folder,
                                  instance_name,
                                  file_type=self.image_type)
        mask_path = get_filepath(self.dataset_path,
                                 data_info_row['dataset_folder'],
                                 self.masks_folder,
                                 instance_name,
                                 file_type=self.mask_type)

        images_array = filter_by_channels(read_tensor(image_path),
                                          self.channels)

        if images_array.ndim == 2:
            images_array = np.expand_dims(images_array, -1)

        masks_array = read_tensor(mask_path)

        if self.channels[0] == 'rgb':
            rgb_tensor = images_array[:, :, :3].astype(np.uint8)

            rgb_aug = Compose(
                [OneOf([RGBShift(), CLAHE(clip_limit=2)], p=0.4)], p=0.9)

            augmented_rgb = rgb_aug(image=rgb_tensor, mask=masks_array)
            images_array = np.concatenate(
                [augmented_rgb['image'], images_array[:, :, 3:]], axis=2)
            masks_array = augmented_rgb['mask']

        aug = Compose([
            RandomRotate90(),
            Flip(),
            OneOf([
                RandomSizedCrop(min_max_height=(int(
                    self.image_size * 0.7), self.image_size),
                                height=self.image_size,
                                width=self.image_size)
            ],
                  p=0.4),
            ToTensor()
        ])

        augmented = aug(image=images_array, mask=masks_array)
        augmented_images = augmented['image']
        augmented_masks = augmented['mask']

        return {'features': augmented_images, 'targets': augmented_masks}
def stratify(data_info,
             data_path=args.data_path,
             test_size=0.2,
             random_state=42,
             instance_type=args.instance_type,
             instances_folder=args.instances_folder):

    X, _ = get_data(data_info)
    areas = []
    for _, row in data_info.iterrows():
        instance_name = get_fullname(row['name'], row['position'])
        instance_path = get_filepath(data_path,
                                     row['name'],
                                     instances_folder,
                                     instance_name,
                                     instance_name,
                                     file_type=instance_type)
        areas.append(get_area(instance_path))

    labels = get_labels(np.array(areas))

    sss = StratifiedShuffleSplit(n_splits=1,
                                 test_size=test_size,
                                 random_state=random_state)

    return sss.split(X, labels)
def predict(data_path, model_weights_path, network, test_df_path, save_path,
            size, channels):
    model = get_model(network)
    model.encoder.conv1 = nn.Conv2d(count_channels(args.channels),
                                    64,
                                    kernel_size=(7, 7),
                                    stride=(2, 2),
                                    padding=(3, 3),
                                    bias=False)

    checkpoint = torch.load(model_weights_path, map_location='cpu')
    model.load_state_dict(checkpoint['model_state_dict'])

    test_df = pd.read_csv(test_df_path)

    predictions_path = os.path.join(save_path, "predictions")

    if not os.path.exists(predictions_path):
        os.makedirs(predictions_path, exist_ok=True)
        print("Prediction directory created.")

    for _, image_info in tqdm(test_df.iterrows()):
        filename = '_'.join([image_info['name'], image_info['position']])
        image_path = get_filepath(data_path,
                                  image_info['dataset_folder'],
                                  'images',
                                  filename,
                                  file_type='tiff')

        image_tensor = filter_by_channels(read_tensor(image_path), channels)
        if image_tensor.ndim == 2:
            image_tensor = np.expand_dims(image_tensor, -1)

        image = transforms.ToTensor()(image_tensor)

        prediction = model.predict(
            image.view(1, count_channels(channels), size, size))

        result = prediction.view(size, size).detach().numpy()

        cv.imwrite(get_filepath(predictions_path, filename, file_type='png'),
                   result * 255)
def save_split(split_info, filename, save_path):
    split_info.to_csv(get_filepath(save_path, filename, file_type='csv'),
                      index=False)
def geo_split(
    data_path=args.data_path,
    markup_path=args.markup_path,
    mask_type=args.mask_type,
    masks_folder=args.masks_folder,
    polygons_folder=args.polygons_folder,
    test_threshold=0.2,
    val_bottom_threshold=0.2,
    val_threshold=0.3,
):
    datasets = get_folders(data_path)
    geojson_markup = gp.read_file(markup_path)

    minY, maxY = get_height_bounds(geojson_markup)

    height = maxY - minY

    cols = ['dataset_folder', 'name', 'position']
    train_df = pd.DataFrame(columns=cols)
    val_df = pd.DataFrame(columns=cols)
    test_df = pd.DataFrame(columns=cols)

    overall_sizes = {'test': 0, 'train': 0, 'val': 0, 'deleted': 0}

    for dataset_dir in datasets:
        polys_path = os.path.join(data_path, dataset_dir, polygons_folder)
        print(dataset_dir)

        deleted = 0
        train = 0
        test = 0
        val = 0

        for poly_name in os.listdir(polys_path):
            instance_geojson_path = os.path.join(polys_path, poly_name)
            instance_geojson = gp.read_file(instance_geojson_path)

            if geojson_markup.crs != instance_geojson.crs:
                geojson_markup = geojson_markup.to_crs(instance_geojson.crs)
                minY, maxY = get_height_bounds(geojson_markup)
                height = maxY - minY

            instance_minY, instance_maxY = get_height_bounds(instance_geojson)

            name, position = get_instance_info(poly_name)

            masks_path = os.path.join(data_path, dataset_dir, masks_folder)
            mask_path = get_filepath(masks_path,
                                     get_fullname(name, position),
                                     file_type=mask_type)
            mask = Image.open(mask_path)
            mask_array = np.array(mask)

            mask_pixels = np.count_nonzero(mask_array)
            center_pixels = np.count_nonzero(mask_array[10:-10, 10:-10])
            border_pixels = mask_pixels - center_pixels

            if mask_pixels > mask_array.size * 0.001 and center_pixels > border_pixels:
                if instance_maxY < minY + height * test_threshold:
                    test += 1
                    test_df = add_record(test_df,
                                         dataset_folder=name,
                                         name=name,
                                         position=position)
                elif instance_maxY < minY + height * val_threshold \
                        and instance_minY > minY + height * val_bottom_threshold:
                    val += 1
                    val_df = add_record(val_df,
                                        dataset_folder=name,
                                        name=name,
                                        position=position)
                else:
                    train += 1
                    train_df = add_record(train_df,
                                          dataset_folder=name,
                                          name=name,
                                          position=position)
            else:
                deleted += 1

        print("Train size", train, "Validation size", val, "Test size", test)
        print(f"{deleted} images were deleted")
        overall_sizes = update_overall_sizes(overall_sizes, test, train, val,
                                             deleted)

    print("Overall sizes", overall_sizes)

    return train_df, val_df, test_df