Beispiel #1
0
def test_downsize_imagelist(tiny_ic_data_path, tmp):
    im_list = ImageList.from_folder(tiny_ic_data_path)
    max_dim = 50
    downsize_imagelist(im_list, tmp, max_dim)
    im_list2 = ImageList.from_folder(tmp)
    assert len(im_list) == len(im_list2)
    for im_path in im_list2.items:
        assert min(Image.open(im_path).size) <= max_dim
def testing_databunch(tmp_session):
    """ Builds a databunch from the Fridge Objects
    and returns its validation component that is used
    to test comparative_set_builder"""
    im_paths = unzip_url(
        ic_urls.fridge_objects_tiny_path,
        fpath=tmp_session,
        dest=tmp_session,
        exist_ok=True,
    )
    can_im_paths = os.listdir(os.path.join(im_paths, "can"))
    can_im_paths = [
        os.path.join(im_paths, "can", im_name) for im_name in can_im_paths
    ][0:5]
    random.seed(642)
    data = (
        ImageList.from_folder(im_paths)
        .split_by_rand_pct(valid_pct=0.2, seed=20)
        .label_from_folder()
        .transform(size=300)
        .databunch(bs=16)
        .normalize(imagenet_stats)
    )

    validation_bunch = data.valid_ds

    return validation_bunch
Beispiel #3
0
def tiny_ic_databunch(tmp_session):
    """ Returns a databunch object for the tiny fridge objects dataset. """
    im_paths = unzip_url(Urls.fridge_objects_tiny_path,
                         tmp_session,
                         exist_ok=True)
    return (ImageList.from_folder(im_paths).split_by_rand_pct(
        valid_pct=0.1, seed=20).label_from_folder().transform(
            size=50).databunch(bs=16).normalize(imagenet_stats))
Beispiel #4
0
    def predict(self, paths):
        item_list = ImageList(paths)
        self.learn.data.add_test(item_list)

        pred_probs, _ = self.learn.get_preds(DatasetType.Test)
        pred_probs = pred_probs.numpy()
        preds = pred_probs.argmax(axis=1)
        labels = [self.learn.data.classes[label_idx] for label_idx in preds]

        return pred_probs[:, self.target_class_idx], labels
Beispiel #5
0
def create_dataset(path_fullRes: Path, path_list, downsize=True):
    il = ImageList.from_folder(path_fullRes)

    for p, size, qf in path_list:
        if not p.exists():
            print(f"Creating {p}")
            print(f"Size: {size} with {qf} quality factor")
            parallel(partial(create_training_images,
                             p_hr=path_fullRes,
                             p_lr=p,
                             size=size,
                             qualityFactor=qf,
                             downsize=downsize), il.items)
Beispiel #6
0
def run_minigooglenet(input_path, output_path, batch_size, epochs,
                      learning_rate):

    path = Path(input_path)

    # Creating Databunch
    data = (ImageList.from_folder(path).split_by_folder(
        train="train", valid="test").label_from_folder().transform(
            tfms=None, size=32).databunch(bs=batch_size).normalize())

    # Defining the learner
    minigooglenet_learner = Learner(
        data=data,
        model=MiniGoogLeNet(n_class=data.c, size=32, depth=3),
        loss_func=nn.CrossEntropyLoss(),
        metrics=accuracy,
    )

    # Training the model
    minigooglenet_learner.fit_one_cycle(epochs, learning_rate)

    val_acc = int(
        np.round(
            minigooglenet_learner.recorder.metrics[-1][0].numpy().tolist(), 3)
        * 1000)

    # Saving the model
    minigooglenet_learner.save("minigooglenet_cifar10_stg_1_" + str(val_acc))

    # Evaluation
    print("Evaluating Network..")
    interp = ClassificationInterpretation.from_learner(minigooglenet_learner)
    print(classification_report(interp.y_true, interp.pred_class))

    # Plotting train and validation loss
    minigooglenet_learner.recorder.plot_losses()
    plt.savefig(output_path + "/loss.png")

    minigooglenet_learner.recorder.plot_metrics()
    plt.savefig(output_path + "/metric.png")
Beispiel #7
0
def inference_dict(path_to_model, progress_output=True):
    """
    Makes inference on `test` images from `path_to_model/test` subfolder.
    """
    data = (ImageList.from_folder(path_to_model)
        .split_by_folder()
        .label_from_folder()
        .add_test_folder('test')
        .transform(get_transforms(), size=224)
        .databunch()
        .normalize(imagenet_stats))
    # load model from `export.pkl` file
    learn = load_learner(path_to_model)
    # inference on all test images
    res_dict = dict()
    for idx in range(len(data.test_ds)):
        img = data.test_ds[idx][0]
        start_time = time.time()
        label, _, probs = learn.predict(img)
        elapsed_time = time.time() - start_time
        label = str(label)
        fname = data.test_dl.dataset.items[idx].stem
        # create dictionary value (future dataframe row)
        row = [label]
        row.extend([float(p) for p in probs])
        row.extend([elapsed_time])
        res_dict[fname] = row
        if progress_output:
            print("'{}' --> '{:>17}' class with probabilities [{:04.2f}, {:04.2f}, {:04.2f}] inference time: {:04.3} seconds".
                  format(fname, label, probs[0], probs[1], probs[2], elapsed_time))
    # creating columns names for pretty outputs
    prob_names = data.classes
    prob_names = ["p_" + el for el in prob_names]
    columns = ['label']
    columns.extend(prob_names)
    columns.extend(['time'])
    df = pd.DataFrame.from_dict(res_dict, orient='index', columns=columns)
    return df
def predict_pga_entity(path_exported_learner:Union[pathlib.Path, str], wsi_path:Union[pathlib.Path, str])->Dict:
    """
    Arguments:
        path_exported_learner: path to file that has been exported via fastai.basic_train.Learner.export()
        wsi_path: path to a whole-slide image
    Returns:
        returns a dictionary with probabilities for the classes: ['acth', 'silent', 'lh', 'fsh']
    """
    # calculate tiles 
    #tilesummaries = tiles.WsiOrROIToTilesMultithreaded(wsi_paths=[wsi_path], 
    #                                                tiles_folder_path=None, 
    #                                                tile_height=512, 
    #                                                tile_width=512, 
    #                                                tile_naming_func=tiles.get_wsi_name_from_path_pituitary_adenoma_entities,
    #                                                tile_score_thresh=0.7,
    #                                                tile_scoring_function=tiles.scoring_function_1,
    #                                                is_wsi=True,
    #                                                level=0,
    #                                                save_tiles=False,
    #                                                return_as_tilesummary_object=True)
    #ts = tilesummaries[0]

    ts = tiles.WsiOrROIToTiles(wsi_path=wsi_path, 
                                                        tiles_folder_path=None, 
                                                        tile_height=512, 
                                                        tile_width=512, 
                                                        tile_naming_func=tiles.get_wsi_name_from_path_pituitary_adenoma_entities,
                                                        tile_score_thresh=0.1,
                                                        tile_scoring_function=tiles.scoring_function_1,
                                                        is_wsi=True,
                                                        level=0,
                                                        save_tiles=False,
                                                        return_as_tilesummary_object=True)
    
    
    
    
    #overwrite fastai's function for opening images !!!TODO: change this by using a custom DataLoader Implementation
    def open_custom(self, fn):
        "Open image in `fn`."
        return open_image_custom(fn, convert_mode=self.convert_mode, after_open=self.after_open)

    def open_image_custom(fn:typing.Union[pathlib.Path, str], 
                      div:bool=True, 
                      convert_mode:str='RGB', 
                      cls:type=fastai.vision.Image, 
                      after_open:Callable=None)->fastai.vision.Image:
        "Open image in `fn`."
        fn = Path(fn)
        tile_name = fn.name
        t = tile_name_to_tile_object[tile_name]
        tile = tiles.ExtractTileFromWSI(path=t.wsi_path, 
                                        x=t.get_x(), 
                                        y=t.get_y(), 
                                        width=t.get_width(), 
                                        height=t.get_height(), 
                                        level=t.level)
        tile = tile.convert(convert_mode)
        if after_open: 
            tile = after_open(tile)
        tile = pil2tensor(tile,np.float32)
        if div: 
            tile.div_(255)
        return cls(tile)
        
    fastai.vision.data.ImageList.open = open_custom
    fastai.vision.image.open_image = open_image_custom
    
    #create fastai.vision.data.ImageList
    tiles_df = pd.DataFrame([t.get_name() for t in ts.top_tiles()], columns=['name'])

    tile_name_to_tile_object = {}
    for t in ts.top_tiles():
        tile_name_to_tile_object[t.get_name()] = t
    
    img_list = ImageList.from_df(df=tiles_df, path='')
    
    #init learner
    learner = load_learner(path=Path(path_exported_learner).parent, file=Path(path_exported_learner).name, test=img_list)
    learner.data.batch_size = 6
    
    #make predictions on tiles
    #preds, y = learner.get_preds(ds_type=fastai.basic_data.DatasetType.Test)
    preds = torch.stack([learner.predict(img)[2] for img in img_list[:8]])
    
    #calculate probabilities for the whole-slide image
    thresh = 0.5
    preds_bool = (preds > thresh).float()
    probs_wsi = preds_bool.sum(0)/len(preds_bool)
    classes = ['acth', 'silent', 'lh', 'fsh']
    result = {}
    for n, c in enumerate(classes):
        result[c] = probs_wsi[n].item()
    return result
Beispiel #9
0
def prepare_data(path,
                 class_mapping=None,
                 chip_size=224,
                 val_split_pct=0.1,
                 batch_size=64,
                 transforms=None,
                 collate_fn=_bb_pad_collate,
                 seed=42,
                 dataset_type=None,
                 resize_to=None,
                 **kwargs):
    """
    Prepares a data object from training sample exported by the 
    Export Training Data tool in ArcGIS Pro or Image Server, or training 
    samples in the supported dataset formats. This data object consists of 
    training and validation data sets with the specified transformations, 
    chip size, batch size, split percentage, etc. 
    -For object detection, use Pascal_VOC_rectangles format.
    -For feature categorization use Labelled Tiles or ImageNet format.
    -For pixel classification, use Classified Tiles format.
    -For entity extraction from text, use IOB, BILUO or ner_json formats. 

    =====================   ===========================================
    **Argument**            **Description**
    ---------------------   -------------------------------------------
    path                    Required string. Path to data directory.
    ---------------------   -------------------------------------------
    class_mapping           Optional dictionary. Mapping from id to
                            its string label.
                            For dataset_type=IOB, BILUO or ner_json:
                                Provide address field as class mapping
                                in below format:
                                class_mapping={'address_tag':'address_field'}
    ---------------------   -------------------------------------------
    chip_size               Optional integer. Size of the image to train the
                            model.
    ---------------------   -------------------------------------------
    val_split_pct           Optional float. Percentage of training data to keep
                            as validation.
    ---------------------   -------------------------------------------
    batch_size              Optional integer. Batch size for mini batch gradient
                            descent (Reduce it if getting CUDA Out of Memory
                            Errors).
    ---------------------   -------------------------------------------
    transforms              Optional tuple. Fast.ai transforms for data
                            augmentation of training and validation datasets
                            respectively (We have set good defaults which work
                            for satellite imagery well). If transforms is set
                            to `False` no transformation will take place and 
                            `chip_size` parameter will also not take effect.
    ---------------------   -------------------------------------------
    collate_fn              Optional function. Passed to PyTorch to collate data
                            into batches(usually default works).
    ---------------------   -------------------------------------------
    seed                    Optional integer. Random seed for reproducible
                            train-validation split.
    ---------------------   -------------------------------------------
    dataset_type            Optional string. `prepare_data` function will infer 
                            the `dataset_type` on its own if it contains a 
                            map.txt file. If the path does not contain the 
                            map.txt file pass either of 'PASCAL_VOC_rectangles', 
                            'RCNN_Masks' and 'Classified_Tiles'                    
    ---------------------   -------------------------------------------
    resize_to               Optional integer. Resize the image to given size.
    =====================   ===========================================

    :returns: data object
    """
    """kwargs documentation
    imagery_type='RGB' # Change to known imagery_type or anything else to trigger multispectral
    bands=None # sepcify bands type for unknow imagery ['r', 'g', 'b', 'nir']
    rgb_bands=[0, 1, 2] # specify rgb bands indices for unknown imagery
    norm_pct=0.3 # sample of images to calculate normalization stats on 
    do_normalize=True # Normalize data 
    """

    height_width = []

    if not HAS_FASTAI:
        _raise_fastai_import_error()

    if isinstance(path, str) and not os.path.exists(path):
        raise Exception("Invalid input path.")

    if type(path) is str:
        path = Path(path)

    databunch_kwargs = {'num_workers': 0} if sys.platform == 'win32' else {}
    databunch_kwargs['bs'] = batch_size

    kwargs_transforms = {}
    if resize_to:
        kwargs_transforms['size'] = resize_to

    has_esri_files = _check_esri_files(path)
    alter_class_mapping = False
    color_mapping = None

    # Multispectral Kwargs init
    _bands = None
    _imagery_type = None
    _is_multispectral = False
    _show_batch_multispectral = None

    if dataset_type is None and not has_esri_files:
        raise Exception("Could not infer dataset type.")

    if dataset_type != "Imagenet" and has_esri_files:
        stats_file = path / 'esri_accumulated_stats.json'
        with open(stats_file) as f:
            stats = json.load(f)
            dataset_type = stats['MetaDataMode']

        with open(path / 'map.txt') as f:
            line = f.readline()

        right = line.split()[1].split('.')[-1].lower()

        json_file = path / 'esri_model_definition.emd'
        with open(json_file) as f:
            emd = json.load(f)

        # Create Class Mapping from EMD if not specified by user
        ## Validate user defined class_mapping keys with emd (issue #3064)
        # Get classmapping from emd file.
        try:
            emd_class_mapping = {i['Value']: i['Name'] for i in emd['Classes']}
        except KeyError:
            emd_class_mapping = {
                i['ClassValue']: i['ClassName']
                for i in emd['Classes']
            }

        ## Change all keys to int.
        if class_mapping is not None:
            class_mapping = {
                int(key): value
                for key, value in class_mapping.items()
            }
        else:
            class_mapping = {}

        ## Map values from user defined classmapping to emd classmapping.
        for key, _ in emd_class_mapping.items():
            if class_mapping.get(key) is not None:
                emd_class_mapping[key] = class_mapping[key]

        class_mapping = emd_class_mapping

        color_mapping = {(i.get('Value', 0) or i.get('ClassValue', 0)):
                         i['Color']
                         for i in emd.get('Classes', [])}

        if color_mapping.get(None):
            del color_mapping[None]

        if class_mapping.get(None):
            del class_mapping[None]

        # Multispectral support from EMD
        # Not Implemented Yet
        if emd.get('bands', None) is not None:
            _bands = emd.get['bands']  # Not Implemented

        if emd.get('imagery_type', None) is not None:
            _imagery_type = emd.get['imagery_type']  # Not Implemented

    elif dataset_type == 'PASCAL_VOC_rectangles' and not has_esri_files:
        if class_mapping is None:
            class_mapping = _get_class_mapping(path / 'labels')
            alter_class_mapping = True

    # Multispectral check
    imagery_type = 'RGB'
    if kwargs.get('imagery_type', None) is not None:
        imagery_type = kwargs.get('imagery_type')
    elif _imagery_type is not None:
        imagery_type = _imagery_type

    bands = None
    if kwargs.get('bands', None) is not None:
        bands = kwargs.get('bands')
        for i, b in enumerate(bands):
            if type(b) == str:
                bands[i] = b.lower()
    elif imagery_type_lib.get(imagery_type, None) is not None:
        bands = imagery_type_lib.get(imagery_type)['bands']
    elif _bands is not None:
        bands = _bands

    rgb_bands = None
    if kwargs.get('rgb_bands', None) is not None:
        rgb_bands = kwargs.get('rgb_bands')
    elif bands is not None:
        rgb_bands = [bands.index(b) for b in ['r', 'g', 'b'] if b in bands]

    if (bands is not None) or (rgb_bands
                               is not None) or (not imagery_type == 'RGB'):
        if imagery_type == 'RGB':
            imagery_type = 'multispectral'
        _is_multispectral = True

    if kwargs.get('norm_pct', None) is not None:
        norm_pct = kwargs.get('norm_pct')
        norm_pct = min(max(0, norm_pct), 1)
    else:
        norm_pct = .3

    lighting_transforms = kwargs.get('lighting_transforms', True)

    if dataset_type == 'RCNN_Masks':

        def get_labels(x, label_dirs, ext=right):
            label_path = []
            for lbl in label_dirs:
                if os.path.exists(Path(lbl) / (x.stem + '.{}'.format(ext))):
                    label_path.append(Path(lbl) / (x.stem + '.{}'.format(ext)))
            return label_path

        if class_mapping.get(0):
            del class_mapping[0]

        if color_mapping.get(0):
            del color_mapping[0]

        # Handle Multispectral
        if _is_multispectral:
            src = (ArcGISInstanceSegmentationMSItemList.from_folder(
                path / 'images').split_by_rand_pct(val_split_pct, seed=seed))
            _show_batch_multispectral = show_batch_rcnn_masks
        else:
            src = (ArcGISInstanceSegmentationItemList.from_folder(
                path / 'images').split_by_rand_pct(val_split_pct, seed=seed))

        label_dirs = []
        index_dir = {}  #for handling calss value with any number
        for i, k in enumerate(sorted(class_mapping.keys())):
            label_dirs.append(class_mapping[k])
            index_dir[k] = i + 1
        label_dir = [
            os.path.join(path / 'labels', lbl) for lbl in label_dirs
            if os.path.isdir(os.path.join(path / 'labels', lbl))
        ]
        get_y_func = partial(get_labels, label_dirs=label_dir)
        src = src.label_from_func(get_y_func,
                                  chip_size=chip_size,
                                  classes=['NoData'] +
                                  list(class_mapping.values()),
                                  class_mapping=class_mapping,
                                  color_mapping=color_mapping,
                                  index_dir=index_dir)

    elif dataset_type == 'Classified_Tiles':

        def get_y_func(x, ext=right):
            return x.parents[1] / 'labels' / (x.stem + '.{}'.format(ext))

        if class_mapping.get(0):
            del class_mapping[0]

        if color_mapping.get(0):
            del color_mapping[0]

        if is_no_color(color_mapping):
            color_mapping = {
                j: [random.choice(range(256)) for i in range(3)]
                for j in class_mapping.keys()
            }

        # TODO : Handle NoData case

        # Handle Multispectral
        if _is_multispectral:
            data = ArcGISSegmentationMSItemList.from_folder(path/'images')\
                .split_by_rand_pct(val_split_pct, seed=seed)\
                .label_from_func(
                    get_y_func, classes=(['NoData'] + list(class_mapping.values())),
                    class_mapping=class_mapping,
                    color_mapping=color_mapping
                )
            _show_batch_multispectral = _show_batch_unet_multispectral

            def classified_tiles_collate_fn(
                samples
            ):  # The default fastai collate_fn was causing memory leak on tensors
                r = (torch.stack([x[0].data for x in samples]),
                     torch.stack([x[1].data for x in samples]))
                return r

            databunch_kwargs['collate_fn'] = classified_tiles_collate_fn

        else:
            data = ArcGISSegmentationItemList.from_folder(path/'images')\
                .split_by_rand_pct(val_split_pct, seed=seed)\
                .label_from_func(
                    get_y_func, classes=(['NoData'] + list(class_mapping.values())),
                    class_mapping=class_mapping,
                    color_mapping=color_mapping
                )

        if transforms is None:
            transforms = get_transforms(flip_vert=True,
                                        max_rotate=90.,
                                        max_zoom=3.0,
                                        max_lighting=0.5)

        kwargs_transforms['tfm_y'] = True
        kwargs_transforms['size'] = chip_size
    elif dataset_type == 'PASCAL_VOC_rectangles':
        not_label_count = [0]
        get_y_func = partial(_get_bbox_lbls,
                             class_mapping=class_mapping,
                             not_label_count=not_label_count,
                             height_width=height_width)

        if _is_multispectral:
            data = SSDObjectMSItemList.from_folder(path/'images')\
            .split_by_rand_pct(val_split_pct, seed=seed)\
            .label_from_func(get_y_func)
            _show_batch_multispectral = show_batch_pascal_voc_rectangles
        else:
            data = SSDObjectItemList.from_folder(path/'images')\
                .split_by_rand_pct(val_split_pct, seed=seed)\
                .label_from_func(get_y_func)

        if not_label_count[0]:
            logger = logging.getLogger()
            logger.warning("Please check your dataset. " +
                           str(not_label_count[0]) +
                           " images dont have the corresponding label files.")

        if transforms is None:
            ranges = (0, 1)
            train_tfms = [
                crop(size=chip_size, p=1., row_pct=ranges, col_pct=ranges),
                dihedral_affine() if has_esri_files else flip_lr(),
                brightness(change=(0.4, 0.6)),
                contrast(scale=(0.75, 1.5)),
                rand_zoom(scale=(1.0, 1.5))
            ]
            val_tfms = [crop(size=chip_size, p=1., row_pct=0.5, col_pct=0.5)]
            transforms = (train_tfms, val_tfms)

        kwargs_transforms['tfm_y'] = True
        databunch_kwargs['collate_fn'] = collate_fn
    elif dataset_type in ['Labeled_Tiles', 'Imagenet']:
        if dataset_type == 'Labeled_Tiles':
            get_y_func = partial(_get_lbls, class_mapping=class_mapping)
        else:

            def get_y_func(x):
                return x.parent.stem

        if _is_multispectral:
            data = ArcGISMSImageList.from_folder(path/'images')\
                .split_by_rand_pct(val_split_pct, seed=42)\
                .label_from_func(get_y_func)
            _show_batch_multispectral = show_batch_labeled_tiles
        else:
            data = ImageList.from_folder(path/'images')\
                .split_by_rand_pct(val_split_pct, seed=42)\
                .label_from_func(get_y_func)

        if dataset_type == 'Imagenet':
            class_mapping = {}
            index = 1
            for class_name in data.classes:
                class_mapping[index] = class_name
                index = index + 1

        if transforms is None:
            ranges = (0, 1)
            train_tfms = [
                rotate(degrees=30, p=0.5),
                crop(size=chip_size, p=1., row_pct=ranges, col_pct=ranges),
                dihedral_affine(),
                brightness(change=(0.4, 0.6)),
                contrast(scale=(0.75, 1.5))
            ]
            val_tfms = [crop(size=chip_size, p=1.0, row_pct=0.5, col_pct=0.5)]
            transforms = (train_tfms, val_tfms)
    elif dataset_type in ['ner_json', 'BIO', 'IOB', 'LBIOU', 'BILUO']:
        return ner_prepare_data(dataset_type=dataset_type,
                                path=path,
                                class_mapping=class_mapping,
                                val_split_pct=val_split_pct)
    else:
        raise NotImplementedError(
            'Unknown dataset_type="{}".'.format(dataset_type))

    if _is_multispectral:
        if dataset_type == 'RCNN_Masks':
            kwargs['do_normalize'] = False
            if transforms == None:
                data = (src.transform(
                    size=chip_size, tfm_y=True).databunch(**databunch_kwargs))
            else:
                data = (src.transform(
                    transforms, size=chip_size,
                    tfm_y=True).databunch(**databunch_kwargs))
        else:
            data = (data.transform(
                transforms, **kwargs_transforms).databunch(**databunch_kwargs))

        if len(data.x) < 300:
            norm_pct = 1

        # Statistics
        dummy_stats = {
            "batch_stats_for_norm_pct_0": {
                "band_min_values": None,
                "band_max_values": None,
                "band_mean_values": None,
                "band_std_values": None,
                "scaled_min_values": None,
                "scaled_max_values": None,
                "scaled_mean_values": None,
                "scaled_std_values": None
            }
        }
        normstats_json_path = os.path.abspath(data.path / '..' /
                                              'esri_normalization_stats.json')
        if not os.path.exists(normstats_json_path):
            normstats = dummy_stats
            with open(normstats_json_path, 'w', encoding='utf-8') as f:
                json.dump(normstats, f, ensure_ascii=False, indent=4)
        else:
            with open(normstats_json_path) as f:
                normstats = json.load(f)

        norm_pct_search = f"batch_stats_for_norm_pct_{round(norm_pct*100)}"
        if norm_pct_search in normstats:
            batch_stats = normstats[norm_pct_search]
            for s in batch_stats:
                if batch_stats[s] is not None:
                    batch_stats[s] = torch.tensor(batch_stats[s])
        else:
            batch_stats = _get_batch_stats(data.x, norm_pct)
            normstats[norm_pct_search] = dict(batch_stats)
            for s in normstats[norm_pct_search]:
                if normstats[norm_pct_search][s] is not None:
                    normstats[norm_pct_search][s] = normstats[norm_pct_search][
                        s].tolist()
            with open(normstats_json_path, 'w', encoding='utf-8') as f:
                json.dump(normstats, f, ensure_ascii=False, indent=4)

        # batch_stats -> [band_min_values, band_max_values, band_mean_values, band_std_values, scaled_min_values, scaled_max_values, scaled_mean_values, scaled_std_values]
        data._band_min_values = batch_stats['band_min_values']
        data._band_max_values = batch_stats['band_max_values']
        data._band_mean_values = batch_stats['band_mean_values']
        data._band_std_values = batch_stats['band_std_values']
        data._scaled_min_values = batch_stats['scaled_min_values']
        data._scaled_max_values = batch_stats['scaled_max_values']
        data._scaled_mean_values = batch_stats['scaled_mean_values']
        data._scaled_std_values = batch_stats['scaled_std_values']

        # Prevent Divide by zeros
        data._band_max_values[data._band_min_values ==
                              data._band_max_values] += 1
        data._scaled_std_values[data._scaled_std_values == 0] += 1e-02

        # Scaling
        data._min_max_scaler = partial(_tensor_scaler,
                                       min_values=data._band_min_values,
                                       max_values=data._band_max_values,
                                       mode='minmax')
        data._min_max_scaler_tfm = partial(_tensor_scaler_tfm,
                                           min_values=data._band_min_values,
                                           max_values=data._band_max_values,
                                           mode='minmax')

        #data.add_tfm(data._min_max_scaler_tfm)

        # Transforms
        def _scaling_tfm(x):
            ## Scales Fastai Image Scaling | MS Image Values -> 0 - 1 range
            return x.__class__(data._min_max_scaler_tfm((x.data, None))[0][0])

        ## Fastai need tfm, order and resolve.
        class dummy():
            pass

        _scaling_tfm.tfm = dummy()
        _scaling_tfm.tfm.order = 0
        _scaling_tfm.resolve = dummy

        ## Scaling the images before applying any  other transform
        if getattr(data.train_ds, 'tfms') is not None:
            data.train_ds.tfms = [_scaling_tfm] + data.train_ds.tfms
        else:
            data.train_ds.tfms = [_scaling_tfm]
        if getattr(data.valid_ds, 'tfms') is not None:
            data.valid_ds.tfms = [_scaling_tfm] + data.valid_ds.tfms
        else:
            data.valid_ds.tfms = [_scaling_tfm]

        # Normalize
        data._do_normalize = True
        if kwargs.get('do_normalize', None) is not None:
            data._do_normalize = kwargs.get('do_normalize', True)
        if data._do_normalize:
            data = data.normalize(stats=(data._scaled_mean_values,
                                         data._scaled_std_values),
                                  do_x=True,
                                  do_y=False)

    elif dataset_type == 'RCNN_Masks':
        if transforms == None:
            data = (src.transform(size=chip_size,
                                  tfm_y=True).databunch(**databunch_kwargs))
        else:
            data = (src.transform(transforms, size=chip_size,
                                  tfm_y=True).databunch(**databunch_kwargs))
        data.show_batch = types.MethodType(show_batch_rcnn_masks, data)
    else:
        #
        data = (data.transform(transforms, **kwargs_transforms).databunch(
            **databunch_kwargs).normalize(imagenet_stats))

    data.chip_size = data.x[0].shape[-1] if transforms is False else chip_size

    if alter_class_mapping:
        new_mapping = {}
        for i, class_name in enumerate(class_mapping.keys()):
            new_mapping[i + 1] = class_name
        class_mapping = new_mapping

    data.class_mapping = class_mapping
    data.color_mapping = color_mapping
    data.show_batch = partial(data.show_batch,
                              rows=min(int(math.sqrt(batch_size)), 5))
    data.orig_path = path
    data.resize_to = kwargs_transforms.get('size', None)
    data.height_width = height_width

    data._is_multispectral = _is_multispectral
    if data._is_multispectral:
        data._imagery_type = imagery_type
        data._bands = bands
        data._norm_pct = norm_pct
        data._rgb_bands = rgb_bands
        data._symbology_rgb_bands = rgb_bands

        # Handle invalid color mapping
        data._multispectral_color_mapping = color_mapping
        if any(-1 in x for x in data._multispectral_color_mapping.values()):
            random_color_list = np.random.randint(
                low=0,
                high=255,
                size=(len(data._multispectral_color_mapping), 3)).tolist()
            for i, c in enumerate(data._multispectral_color_mapping):
                if -1 in data._multispectral_color_mapping[c]:
                    data._multispectral_color_mapping[c] = random_color_list[i]

        # prepare color array
        alpha = kwargs.get('alpha', 0.7)
        color_array = torch.tensor(list(
            data.color_mapping.values())).float() / 255
        alpha_tensor = torch.tensor([alpha] * len(color_array)).view(
            -1, 1).float()
        color_array = torch.cat([color_array, alpha_tensor], dim=-1)
        background_color = torch.tensor([[0, 0, 0, 0]]).float()
        data._multispectral_color_array = torch.cat(
            [background_color, color_array])

        # Prepare unknown bands list if bands data is missing
        if data._bands is None:
            n_bands = data.x[0].data.shape[0]
            if n_bands == 1:  # Handle Pancromatic case
                data._bands = ['p']
                data._symbology_rgb_bands = [0]
            else:
                data._bands = ['u' for i in range(n_bands)]
                if n_bands == 2:  # Handle Data with two channels
                    data._symbology_rgb_bands = [0]

        #
        if data._rgb_bands is None:
            data._rgb_bands = []

        #
        if data._symbology_rgb_bands is None:
            data._symbology_rgb_bands = [0, 1, 2][:min(n_bands, 3)]

        # Complete symbology rgb bands
        if len(data._bands) > 2 and len(data._symbology_rgb_bands) < 3:
            data._symbology_rgb_bands += [
                min(max(data._symbology_rgb_bands) + 1,
                    len(data._bands) - 1)
                for i in range(3 - len(data._symbology_rgb_bands))
            ]

        # Overwrite band values at r g b indexes with 'r' 'g' 'b'
        for i, band_idx in enumerate(data._rgb_bands):
            if band_idx is not None:
                if data._bands[band_idx] == 'u':
                    data._bands[band_idx] = ['r', 'g', 'b'][i]

        # Attach custom show batch
        if _show_batch_multispectral is not None:
            data.show_batch = types.MethodType(_show_batch_multispectral, data)

        # Apply filter band transformation if user has specified extract_bands otherwise add a generic extract_bands
        """
        extract_bands : List containing band indices of the bands from imagery on which the model would be trained. 
                        Useful for benchmarking and applied training, for reference see examples below.
                        
                        4 band naip ['r, 'g', 'b', 'nir'] + extract_bands=[0, 1, 2] -> 3 band naip with bands ['r', 'g', 'b'] 

        """
        data._extract_bands = kwargs.get('extract_bands', None)
        if data._extract_bands is None:
            data._extract_bands = list(range(len(data._bands)))
        else:
            data._extract_bands_tfm = partial(_extract_bands_tfm,
                                              band_indices=data._extract_bands)
            data.add_tfm(data._extract_bands_tfm)

        # Tail Training Override
        _train_tail = True
        if [data._bands[i] for i in data._extract_bands] == ['r', 'g', 'b']:
            _train_tail = False
        data._train_tail = kwargs.get('train_tail', _train_tail)

    if has_esri_files:
        data._image_space_used = emd.get('ImageSpaceUsed', 'MAP_SPACE')
    else:
        data._image_space_used = 'PIXEL_SPACE'

    return data
Beispiel #10
0
    def _categorize_feature_class(
        self,
        feature_class,
        raster,
        class_value_field,
        class_name_field,
        confidence_field,
        cell_size,
        coordinate_system,
        predict_function,
        batch_size,
        overwrite
    ):
        import arcpy
        arcpy.env.overwriteOutput = overwrite

        if batch_size is None:
            batch_size  = self._data.batch_size

        if predict_function is None:
            predict_function = _prediction_function
        
        norm_mean = torch.tensor(imagenet_stats[0])
        norm_std = torch.tensor(imagenet_stats[1])
        
        fcdesc = arcpy.Describe(feature_class)
        oid_field = fcdesc.OIDFieldName
        if not (fcdesc.dataType == 'FeatureClass' and fcdesc.shapeType == 'Polygon'):
            e = Exception(f"The specified FeatureClass at '{feature_class}' is not valid, it should be Polygon FeatureClass")
            raise(e)
        fields = arcpy.ListFields(feature_class)
        field_names = [f.name for f in fields]
        if class_value_field in field_names:
            if not overwrite:
                e = Exception(f"The specified class_value_field '{class_value_field}' already exists in the target FeatureClass, please specify a different name or set `overwrite=True`")
                raise(e)
        arcpy.DeleteField_management(feature_class, 
                                [ class_value_field ])
        arcpy.AddField_management(feature_class, class_value_field, "LONG")
            
        if class_name_field in field_names:
            if not overwrite:
                e = Exception(f"The specified class_name_field '{class_name_field}' already exists in the target FeatureClass, please specify a different name or set `overwrite=True`")
                raise(e)
        arcpy.DeleteField_management(feature_class, 
                                [ class_name_field ])
        arcpy.AddField_management(feature_class, class_name_field, "TEXT")

        if confidence_field is not None:
            if confidence_field in field_names:
                if not overwrite:
                    e = Exception(f"The specified confidence_field '{confidence_field}' already exists in the target FeatureClass, please specify a different name or set `overwrite=True`")
                    raise(e)
            arcpy.DeleteField_management(feature_class, 
                                    [ confidence_field ])
            arcpy.AddField_management(feature_class, confidence_field, "DOUBLE")

        if raster is not None:
            #Arcpy Environment to export data
            arcpy.env.cellSize = cell_size
            arcpy.env.outputCoordinateSystem = coordinate_system
            arcpy.env.cartographicCoordinateSystem = coordinate_system

            tempid_field = _tempid_field = 'f_fcuid'
            i = 1
            while tempid_field in field_names:
                tempid_field = _tempid_field + str(i)
                i+=1
            arcpy.AddField_management(feature_class, tempid_field, "LONG")
            arcpy.CalculateField_management(feature_class, tempid_field, f"!{oid_field}!")

            temp_folder = arcpy.env.scratchFolder
            temp_datafldr = os.path.join(temp_folder, 'categorize_features_'+str(int(time.time())))
            result = arcpy.ia.ExportTrainingDataForDeepLearning(
                in_raster=raster,
                out_folder=temp_datafldr,
                in_class_data=feature_class,
                image_chip_format="TIFF",
                tile_size_x=self._data.chip_size,
                tile_size_y=self._data.chip_size,
                stride_x=0,
                stride_y=0,
                output_nofeature_tiles="ALL_TILES",
                metadata_format="Labeled_Tiles",
                start_index=0,
                class_value_field=tempid_field,
                buffer_radius=0,
                in_mask_polygons=None,
                rotation_angle=0
            )
            # cleanup
            arcpy.DeleteField_management(feature_class, [ tempid_field ])
            image_list = ImageList.from_folder(os.path.join(temp_datafldr, 'images'))
            def get_id(imagepath):
                with open(os.path.join(temp_datafldr, 'labels', os.path.basename(imagepath)[:-3]+'xml')) as f:
                    return(int(f.read().split('<name>')[1].split('<')[0]))

            for i in range(0, len(image_list), batch_size):
                # Get Temporary Ids
                tempids =[ get_id(f) for f in image_list.items[i:i+batch_size] ]
                
                # Get Image batch
                image_batch = torch.stack([ im.data for im in image_list[i:i+batch_size] ])
                image_batch = normalize(image_batch, mean=norm_mean, std=norm_std)
                
                # Get Predications
                predicted_classes, predictions_conf = self._predict_batch(image_batch)
                
                # Update Feature Class
                where_clause = f"{oid_field} IN ({','.join(str(e) for e in tempids)})"
                update_cursor = arcpy.UpdateCursor(
                    feature_class,
                    where_clause=where_clause,
                    sort_fields=f"{oid_field} A"
                )
                for row in update_cursor:
                    row_tempid = row.getValue(oid_field)
                    ui = tempids.index(row_tempid)
                    classvalue = self._data.classes[predicted_classes[ui]]
                    row.setValue(class_value_field, classvalue)
                    row.setValue(class_name_field, self._data.class_mapping[classvalue])
                    if confidence_field is not None:
                        row.setValue(confidence_field, predictions_conf[ui])
                    update_cursor.updateRow(row)

                # Remove Locks
                del row
                del update_cursor

            # Cleanup
            arcpy.Delete_management(temp_datafldr)
            shutil.rmtree(temp_datafldr, ignore_errors=True)

        else:
            feature_class_attach = feature_class+'__ATTACH'
            nrows = arcpy.GetCount_management(feature_class_attach)[0]
            store={}
            for i in range(0, int(nrows), batch_size):
                attachment_ids = []
                rel_objectids = []
                image_batch = []
                
                # Get Image Batch
                with arcpy.da.SearchCursor(feature_class_attach, [ 'ATTACHMENTID', 'REL_OBJECTID', 'DATA' ]) as search_cursor:
                    for c, item in enumerate(search_cursor):
                        if c >= i and c < i+batch_size :
                            attachment_ids.append(item[0])
                            rel_objectids.append(item[1])
                            attachment = item[-1]
                            im = open_image(io.BytesIO(attachment.tobytes())) # Read Bytes
                            im = im.resize(self._data.chip_size) # Resize
                            image_batch.append(im.data) # Convert to tensor
                            del item
                            del attachment
                            #del im
                image_batch = torch.stack(image_batch)
                image_batch = normalize(image_batch, mean=norm_mean, std=norm_std)
                
                # Get Predictions and save to store
                predicted_classes, predictions_conf = self._predict_batch(image_batch)
                for ai in range(len(attachment_ids)):
                    if store.get(rel_objectids[ai]) is None:
                        store[rel_objectids[ai]] = []
                    store[rel_objectids[ai]].append([predicted_classes[ai], predictions_conf[ai]])
                
            # Update Feature Class
            update_cursor = arcpy.UpdateCursor(feature_class)
            for row in update_cursor:
                row_oid = row.getValue(oid_field)
                max_prediction_class, max_prediction_value = predict_function(store[row_oid])
                if max_prediction_class is not None:
                    classvalue = self._data.classes[max_prediction_class]
                    classname = self._data.class_mapping[classvalue]
                else:
                    classvalue = None
                    classname = None
                row.setValue(class_value_field, classvalue)
                row.setValue(class_name_field, classname)
                if confidence_field is not None:
                    row.setValue(confidence_field, max_prediction_value)
                update_cursor.updateRow(row)

            # Remove Locks
            del row
            del update_cursor
        return True
Beispiel #11
0
    def _categorize_feature_layer(
        self,
        feature_layer,
        raster,
        class_value_field,
        class_name_field,
        confidence_field,
        cell_size,
        coordinate_system,
        predict_function,
        batch_size,
        overwrite
    ):  
        #
        norm_mean = torch.tensor(imagenet_stats[0])
        norm_std = torch.tensor(imagenet_stats[1])

        # Check and create Fields 
        class_name_field_template = {
            "name": class_name_field.lower(),
            "type": "esriFieldTypeString",
            "alias": class_name_field,
            "sqlType": "sqlTypeOther",
            "length": 256,
            "nullable": True,
            "editable": True,
            "visible": True,
            "domain": None,
            "defaultValue": ''
        }

        class_value_field_template = {
            "name": class_value_field.lower(),
            "type": "esriFieldTypeInteger",
            "alias": class_value_field,
            "sqlType": "sqlTypeOther",
            "nullable": True,
            "editable": True,
            "visible": True,
            "domain": None,
            "defaultValue": -999
        }

        to_delete = []
        to_create = []

        feature_layer_fields = { f['name'].lower():f for f in feature_layer.properties["fields"] }
        oid_field = feature_layer.properties['objectIdField']

        if class_value_field_template['name'] in feature_layer_fields:
            if overwrite:
                to_delete.append(feature_layer_fields[class_value_field_template['name']])
            else:
                e = Exception(f"The specified class_value_field '{class_value_field}' already exists, please specify a different name or set `overwrite=True`")
                raise(e)
        to_create.append(class_value_field_template)

        if class_name_field_template['name'] in feature_layer_fields:
            if overwrite:
                to_delete.append(feature_layer_fields[class_name_field_template['name']])
            else:
                e = Exception(f"The specified class_name_field '{class_name_field}' already exists, please specify a different name or set `overwrite=True`")
                raise(e)
        to_create.append(class_name_field_template)
        
        if confidence_field is not None:
            confidence_field_template = {
                "name": confidence_field.lower(),
                "type": "esriFieldTypeDouble",
                "alias": confidence_field,
                "sqlType": "sqlTypeDouble",
                "nullable": True,
                "editable": True,
                "visible": True,
                "domain": None,
                "defaultValue": -999
            }
            if confidence_field_template['name'] in feature_layer_fields:
                if overwrite:
                    to_delete.append(feature_layer_fields[confidence_field_template['name']])
                else:
                    e = Exception(f"The specified confidence_field '{confidence_field}' already exists, please specify a different name or set `overwrite=True`")
                    raise(e)
            to_create.append(confidence_field_template)

        feature_layer.manager.delete_from_definition({'fields': to_delete})
        feature_layer.manager.add_to_definition({'fields': to_create})

        # Get features for updation
        fields_to_update = [oid_field, class_value_field, class_name_field]
        if confidence_field is not None:
            fields_to_update.append(confidence_field)
        feature_layer_features = feature_layer.query(out_fields=",".join(fields_to_update), return_geometry=False).features
        update_store = {}

        if raster is not None:
            import arcpy

            #Arcpy Environment to export data
            arcpy.env.cellSize = cell_size
            arcpy.env.outputCoordinateSystem = coordinate_system
            arcpy.env.cartographicCoordinateSystem = coordinate_system

            feature_layer_url = feature_layer.url

            if feature_layer._token is not None:
                feature_layer_url = feature_layer_url + f"?token={feature_layer._token}"

            
            
            # Create Temporary ID field
            tempid_field = _tempid_field = 'f_fcuid'
            i = 1
            while tempid_field in feature_layer_fields:
                tempid_field = _tempid_field + str(i)
                i+=1
            arcpy.AddField_management(feature_layer_url, tempid_field, "LONG")
            #feature_layer.manager.add_to_definition({'fields': [tempid_field_template]})
            arcpy.CalculateField_management(feature_layer_url, tempid_field, f"{oid_field}", "SQL")

            temp_folder = arcpy.env.scratchFolder
            temp_datafldr = os.path.join(temp_folder, 'categorize_features_'+str(int(time.time())))
            result = arcpy.ia.ExportTrainingDataForDeepLearning(
                in_raster=raster,
                out_folder=temp_datafldr,
                in_class_data=feature_layer_url,
                image_chip_format="TIFF",
                tile_size_x=self._data.chip_size,
                tile_size_y=self._data.chip_size,
                stride_x=0,
                stride_y=0,
                output_nofeature_tiles="ALL_TILES",
                metadata_format="Labeled_Tiles",
                start_index=0,
                class_value_field=tempid_field,
                buffer_radius=0,
                in_mask_polygons=None,
                rotation_angle=0
            )
            # cleanup
            arcpy.DeleteField_management(feature_layer_url, [ tempid_field ])

            image_list = ImageList.from_folder(os.path.join(temp_datafldr, 'images'))
            def get_id(imagepath):
                with open(os.path.join(temp_datafldr, 'labels', os.path.basename(imagepath)[:-3]+'xml')) as f:
                    return(int(f.read().split('<name>')[1].split('<')[0]))

            for i in range(0, len(image_list), batch_size):
                # Get Temporary Ids
                tempids = [ get_id(f) for f in image_list.items[i:i+batch_size] ]
                
                # Get Image batch
                image_batch = torch.stack([ im.data for im in image_list[i:i+batch_size] ])
                image_batch = normalize(image_batch, mean=norm_mean, std=norm_std)
                
                # Get Predications
                predicted_classes, predictions_conf = self._predict_batch(image_batch)
                
                # push prediction to store
                for ui, oid in enumerate(tempids):
                    classvalue = self._data.classes[predicted_classes[ui]]
                    update_store[oid] = {
                        oid_field: oid,
                        class_value_field: classvalue,
                        class_name_field:  self._data.class_mapping[classvalue]
                    } 
                    if confidence_field is not None:
                        update_store[oid][confidence_field] = predictions_conf[ui]

            # Cleanup
            arcpy.Delete_management(temp_datafldr)
            shutil.rmtree(temp_datafldr, ignore_errors=True)

        else:
            out_folder = tempfile.TemporaryDirectory().name
            os.mkdir(out_folder)
            feature_layer.export_attachments(out_folder)
            with open(os.path.join(out_folder, 'mapping.txt')) as file:
                feature_attachments_mapping = json.load(file)
                images_store = []
                for oid in feature_attachments_mapping:
                    for im in feature_attachments_mapping[oid]:
                        images_store.append({
                            'oid': oid,
                            'im': os.path.join(out_folder,im)
                        })
            update_store_scratch = {}
            for i in range(0, len(images_store), batch_size):
                rel_objectids = []
                image_batch = []
                for r in images_store[i:i+batch_size]:
                    im = open_image(r['im']) # Read Bytes
                    im = im.resize(self._data.chip_size) # Resize
                    image_batch.append(im.data) # Convert to tensor
                    rel_objectids.append(int(r['oid']))
                image_batch = torch.stack(image_batch)
                image_batch = normalize(image_batch, mean=norm_mean, std=norm_std)
                # Get Predictions and save to scratch
                predicted_classes, predictions_conf = self._predict_batch(image_batch)
                for ai, oid in enumerate(rel_objectids):
                    if update_store_scratch.get(oid) is None:
                        update_store_scratch[oid] = []
                    update_store_scratch[oid].append([predicted_classes[ai], predictions_conf[ai]])
            # Prepare final updated features
            for oid in update_store_scratch:
                max_prediction_class, max_prediction_value = predict_function(update_store_scratch[oid])
                if max_prediction_class is not None:
                    classvalue = self._data.classes[max_prediction_class]
                    classname = self._data.class_mapping[classvalue]
                else:
                    classvalue = None
                    classname = None                
                update_store[oid] = {
                    oid_field: oid,
                    class_value_field: classvalue,
                    class_name_field:  classname
                } 
                if confidence_field is not None:
                    update_store[oid][confidence_field] = max_prediction_value

        # Update Features
        features_to_update = []
        for feat in feature_layer_features:
            if update_store.get(feat.attributes[oid_field]) is not None:
                updated_attributes = update_store[feat.attributes[oid_field]]
                for f in fields_to_update:
                    feat.attributes[f] = updated_attributes[f]
                features_to_update.append(feat)
        step = 100
        for si in range(0, len(features_to_update), step):
            feature_batch = features_to_update[si:si+step]
            response = feature_layer.edit_features(updates=feature_batch)
            for resp in response.get('updateResults', []):
                if resp.get('success', False):
                    continue
                warnings.warn(f"Something went wrong for data {resp}")
            time.sleep(2)
Beispiel #12
0
'''
File: /Users/origami/Desktop/dl-projects/dl-playground/src/imagesClassify/img_classify.py
Project: /Users/origami/Desktop/dl-projects/dl-playground/src/imagesClassify
Created Date: Monday May 20th 2019
Author: Rick yang tongxue(🍔🍔) ([email protected])
-----
Last Modified: Wednesday May 22nd 2019 9:18:55 am
Modified By: Rick yang tongxue(🍔🍔) ([email protected])
-----
'''
from fastai.vision.transform import get_transforms
from numpy import random
from fastai.vision.data import ImageList
import pandas as pd
import os, sys
path = os.path.abspath('../../data/imageClassify')
df = pd.read_csv(path + '/list_attr_celeba_fixed.csv')
tfms = get_transforms(flip_vert=True,
                      max_lighting=0.1,
                      max_zoom=1.05,
                      max_warp=0.)
src = (ImageList.from_csv(
    path, 'list_attr_celeba.csv',
    folder='img_align_celeba').split_by_rand_pct(0.2).label_from_df(
        label_delim=' '))
Beispiel #13
0
from fastai.train import ClassificationInterpretation
from fastai.vision.data import ImageList
from fastai.vision.learner import cnn_learner
from fastai.vision.models import resnet18
from fastai.vision.transform import ResizeMethod, get_transforms


# %%
from radam import RAdam
from ranger import Ranger

# path to the data
path = Path('data')

# create an image list
il = ImageList.from_folder(path)

# cut off bottom of image

# split the data into a train, validation and test set
# be cogniscant of time
sd = il.split_by_folder(valid='test')

# add labels to images
ll = sd.label_from_folder()

# specify data augmentaion
tfms = get_transforms()
ll = ll.transform(tfms, size=256)#, resize_method=ResizeMethod.SQUISH)

# create databunch to pass to model and optimiser