def main(params):
    """
    Training and validation datasets preparation.
    :param params: (dict) Parameters found in the yaml config file.

    """
    gpkg_file = []
    bucket_name = params['global']['bucket_name']
    data_path = params['global']['data_path']
    csv_file = params['sample']['prep_csv_file']

    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
        else:
            final_samples_folder = "samples"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0, 'tst': 0}
    number_classes = 0

    trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets(params, samples_folder)

    with tqdm(list_data_prep) as _tqdm:
        for info in _tqdm:

            if bucket_name:
                bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1])
                info['tif'] = "Images/" + info['tif'].split('/')[-1]
                if info['gpkg'] not in gpkg_file:
                    gpkg_file.append(info['gpkg'])
                    bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1])
                info['gpkg'] = info['gpkg'].split('/')[-1]

            assert_band_number(info['tif'], params['global']['number_of_bands'])

            _tqdm.set_postfix(OrderedDict(file=f'{info["tif"]}', sample_size=params['global']['samples_size']))

            # Read the input raster image
            np_input_image = image_reader_as_array(info['tif'])

            # Validate the number of class in the vector file
            validate_num_classes(info['gpkg'], params['global']['num_classes'], info['attribute_name'])

            # Burn vector file in a raster file
            np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name'])

            # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre
            # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images.
            scale = params['global']['scale_data'] if params['global']['scale_data'] else True
            if scale:
                sc_min, sc_max = params['global']['scale_data']
                np_input_image = minmax_scale(np_input_image,
                                              orig_range=(np.min(np_input_image), np.max(np_input_image)),
                                              scale_range=(sc_min,sc_max))

            # Mask the zeros from input image into label raster.
            if params['sample']['mask_reference']:
                np_label_raster = mask_image(np_input_image, np_label_raster)

            if info['dataset'] == 'trn':
                out_file = trn_hdf5
            elif info['dataset'] == 'val':
                out_file = val_hdf5
            elif info['dataset'] == 'tst':
                out_file = tst_hdf5
            else:
                raise ValueError(f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}")

            np_label_raster = np.reshape(np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1))
            number_samples, number_classes = samples_preparation(np_input_image,
                                                                 np_label_raster,
                                                                 params['global']['samples_size'],
                                                                 params['sample']['samples_dist'],
                                                                 number_samples,
                                                                 number_classes,
                                                                 out_file,
                                                                 info['dataset'],
                                                                 params['sample']['min_annotated_percent'])

            _tqdm.set_postfix(OrderedDict(number_samples=number_samples))
            out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()
    tst_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5')
        bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5')

    print("End of process")
def main(params):
    """
    Identify the class to which each image belongs.
    :param params: (dict) Parameters found in the yaml config file.

    """
    since = time.time()
    img_dir_or_csv = params['inference']['img_dir_or_csv_file']
    working_folder = Path(params['inference']['working_folder'])
    Path.mkdir(working_folder, exist_ok=True)
    print(f'Inferences will be saved to: {working_folder}')

    bucket = None
    bucket_name = params['global']['bucket_name']

    model, state_dict_path, model_name = net(params, inference=True)

    num_devices = params['global']['num_gpus'] if params['global'][
        'num_gpus'] else 0
    # list of GPU devices that are available and unused. If no GPUs, returns empty list
    lst_device_ids = get_device_ids(
        num_devices) if torch.cuda.is_available() else []
    device = torch.device(f'cuda:{lst_device_ids[0]}' if torch.cuda.
                          is_available() and lst_device_ids else 'cpu')

    if lst_device_ids:
        print(f"Using Cuda device {lst_device_ids[0]}")
    else:
        warnings.warn(
            f"No Cuda device available. This process will only run on CPU")

    model.to(device)

    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        if img_dir_or_csv.endswith('.csv'):
            bucket.download_file(img_dir_or_csv, 'img_csv_file.csv')
            list_img = read_csv('img_csv_file.csv', inference=True)
        else:
            raise NotImplementedError(
                'Specify a csv file containing images for inference. Directory input not implemented yet'
            )
    else:
        if img_dir_or_csv.endswith('.csv'):
            list_img = read_csv(img_dir_or_csv, inference=True)
        else:
            img_dir = Path(img_dir_or_csv)
            assert img_dir.exists(
            ), f'Could not find directory "{img_dir_or_csv}"'
            list_img_paths = sorted(img_dir.glob('*.tif'))
            list_img = []
            for img_path in list_img_paths:
                img = {}
                img['tif'] = img_path
                list_img.append(img)
            assert len(
                list_img) >= 0, f'No .tif files found in {img_dir_or_csv}'

    if params['global']['task'] == 'classification':
        classifier(params, list_img, model)

    elif params['global']['task'] == 'segmentation':
        if bucket:
            bucket.download_file(state_dict_path, "saved_model.pth.tar")
            model, _ = load_from_checkpoint("saved_model.pth.tar", model)
        else:
            model, _ = load_from_checkpoint(state_dict_path, model)

        chunk_size, nbr_pix_overlap = calc_overlap(params)
        num_classes = params['global']['num_classes']
        with tqdm(list_img, desc='image list', position=0) as _tqdm:
            for img in _tqdm:
                img_name = os.path.basename(img['tif'])
                if bucket:
                    local_img = f"Images/{img_name}"
                    bucket.download_file(img['tif'], local_img)
                    inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif"
                else:
                    local_img = img['tif']
                    inference_image = os.path.join(
                        params['inference']['working_folder'],
                        f"{img_name.split('.')[0]}_inference.tif")

                assert_band_number(local_img,
                                   params['global']['number_of_bands'])

                nd_array_tif = image_reader_as_array(local_img)
                assert (len(np.unique(nd_array_tif)) > 1), (
                    f'Image "{img_name}" only contains {np.unique(nd_array_tif)} value.'
                )

                # See: http://cs231n.github.io/neural-networks-2/#datapre. e.g. Scale arrays from [0,255] to [0,1]
                scale = params['global']['scale_data']
                if scale:
                    sc_min, sc_max = params['global']['scale_data']
                    nd_array_tif = minmax_scale(
                        nd_array_tif,
                        orig_range=(np.min(nd_array_tif),
                                    np.max(nd_array_tif)),
                        scale_range=(sc_min, sc_max))
                if debug:
                    _tqdm.set_postfix(
                        OrderedDict(image_name=img_name,
                                    image_shape=nd_array_tif.shape,
                                    scale=scale))

                sem_seg_results = sem_seg_inference(model, nd_array_tif,
                                                    nbr_pix_overlap,
                                                    chunk_size, num_classes,
                                                    device)
                if debug and len(np.unique(sem_seg_results)) == 1:
                    print(
                        f'Something is wrong. Inference contains only "{np.unique(sem_seg_results)} value. Make sure '
                        f'"scale_data" parameter is coherent with parameters used for training model used in inference.'
                    )
                create_new_raster_from_base(local_img, inference_image,
                                            sem_seg_results)
                tqdm.write(
                    f"Semantic segmentation of image {img_name} completed")
                if bucket:
                    bucket.upload_file(
                        inference_image,
                        os.path.join(
                            params['inference']['working_folder'],
                            f"{img_name.split('.')[0]}_inference.tif"))
    else:
        raise ValueError(
            f"The task should be either classification or segmentation. The provided value is {params['global']['task']}"
        )

    time_elapsed = time.time() - since
    print('Inference completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
Exemple #3
0
def main(params):
    """
    Identify the class to which each image belongs.
    :param params: (dict) Parameters found in the yaml config file.

    """
    since = time.time()
    csv_file = params['inference']['img_csv_file']

    bucket = None
    bucket_name = params['global']['bucket_name']

    model, state_dict_path, model_name = net(params, inference=True)

    num_devices = params['global']['num_gpus'] if params['global'][
        'num_gpus'] else 0
    # list of GPU devices that are available and unused. If no GPUs, returns empty list
    lst_device_ids = get_device_ids(
        num_devices) if torch.cuda.is_available() else []
    device = torch.device(f'cuda:{lst_device_ids[0]}' if torch.cuda.
                          is_available() and lst_device_ids else 'cpu')

    if lst_device_ids:
        print(f"Using Cuda device {lst_device_ids[0]}")
    else:
        warnings.warn(
            f"No Cuda device available. This process will only run on CPU")

    model.to(device)

    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'img_csv_file.csv')
        list_img = read_csv('img_csv_file.csv', inference=True)
    else:
        list_img = read_csv(csv_file, inference=True)

    if params['global']['task'] == 'classification':
        classifier(params, list_img, model)

    elif params['global']['task'] == 'segmentation':
        if bucket:
            bucket.download_file(state_dict_path, "saved_model.pth.tar")
            model = load_from_checkpoint("saved_model.pth.tar", model)
        else:
            model = load_from_checkpoint(state_dict_path, model)

        chunk_size, nbr_pix_overlap = calc_overlap(params)
        num_classes = params['global']['num_classes']
        for img in tqdm(list_img, desc='image list', position=0):
            img_name = os.path.basename(img['tif'])
            if bucket:
                local_img = f"Images/{img_name}"
                bucket.download_file(img['tif'], local_img)
                inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif"
            else:
                local_img = img['tif']
                inference_image = os.path.join(
                    params['inference']['working_folder'],
                    f"{img_name.split('.')[0]}_inference.tif")

            assert_band_number(local_img, params['global']['number_of_bands'])

            nd_array_tif = image_reader_as_array(local_img)
            # See: http://cs231n.github.io/neural-networks-2/#datapre
            # e.g. Scale arrays from [0,255] to [0,1]
            scale = params['global']['scale_data'] if params['global'][
                'scale_data'] else True
            if scale:
                sc_min, sc_max = params['global']['scale_data']
                nd_array_tif = minmax_scale(nd_array_tif,
                                            orig_range=(np.min(nd_array_tif),
                                                        np.max(nd_array_tif)),
                                            scale_range=(sc_min, sc_max))

            sem_seg_results = sem_seg_inference(model, nd_array_tif,
                                                nbr_pix_overlap, chunk_size,
                                                num_classes, device)
            create_new_raster_from_base(local_img, inference_image,
                                        sem_seg_results)
            tqdm.write(f"Semantic segmentation of image {img_name} completed")
            if bucket:
                bucket.upload_file(
                    inference_image,
                    os.path.join(params['inference']['working_folder'],
                                 f"{img_name.split('.')[0]}_inference.tif"))
    else:
        raise ValueError(
            f"The task should be either classification or segmentation. The provided value is {params['global']['task']}"
        )

    time_elapsed = time.time() - since
    print('Inference completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
Exemple #4
0
def image_reader_as_array(input_image,
                          scale=None,
                          aux_vector_file=None,
                          aux_vector_attrib=None,
                          aux_vector_ids=None,
                          aux_vector_dist_maps=False,
                          aux_vector_dist_log=True,
                          aux_vector_scale=None):
    """Read an image from a file and return a 3d array (h,w,c)
    Args:
        input_image: Rasterio file handle holding the (already opened) input raster
        scale: optional scaling factor for the raw data
        aux_vector_file: optional vector file from which to extract auxiliary shapes
        aux_vector_attrib: optional vector file attribute name to parse in order to fetch ids
        aux_vector_ids: optional vector ids to target in the vector file above
        aux_vector_dist_maps: flag indicating whether aux vector bands should be distance maps or binary maps
        aux_vector_dist_log: flag indicating whether log distances should be used in distance maps or not
        aux_vector_scale: optional floating point scale factor to multiply to rasterized vector maps

    Return:
        numpy array of the image (possibly concatenated with auxiliary vector channels)
    """
    np_array = np.empty(
        [input_image.height, input_image.width, input_image.count],
        dtype=np.float32)
    for i in range(input_image.count):
        np_array[:, :, i] = input_image.read(
            i + 1)  # Bands starts at 1 in rasterio not 0

    # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre
    # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images.
    if scale:
        sc_min, sc_max = scale
        np_array = minmax_scale(img=np_array,
                                orig_range=(np.min(np_array),
                                            np.max(np_array)),
                                scale_range=(sc_min, sc_max))

    # if requested, load vectors from external file, rasterize, and append distance maps to array
    if aux_vector_file is not None:
        vec_tensor = vector_to_raster(vector_file=aux_vector_file,
                                      input_image=input_image,
                                      attribute_name=aux_vector_attrib,
                                      fill=0,
                                      target_ids=aux_vector_ids,
                                      merge_all=False)
        if aux_vector_dist_maps:
            import cv2 as cv  # opencv becomes a project dependency only if we need to compute distance maps here
            vec_tensor = vec_tensor.astype(np.float32)
            for vec_band_idx in range(vec_tensor.shape[2]):
                mask = vec_tensor[:, :, vec_band_idx]
                mask = cv.dilate(
                    mask,
                    (3, 3))  # make points and linestring easier to work with
                #display_resize = cv.resize(np.where(mask, np.uint8(0), np.uint8(255)), (1000, 1000))
                #cv.imshow("mask", display_resize)
                dmap = cv.distanceTransform(
                    np.where(mask, np.uint8(0), np.uint8(255)), cv.DIST_L2,
                    cv.DIST_MASK_PRECISE)
                if aux_vector_dist_log:
                    dmap = np.log(dmap + 1)
                #display_resize = cv.resize(cv.normalize(dmap, None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F), (1000, 1000))
                #cv.imshow("dmap1", display_resize)
                dmap_inv = cv.distanceTransform(
                    np.where(mask, np.uint8(255), np.uint8(0)), cv.DIST_L2,
                    cv.DIST_MASK_PRECISE)
                if aux_vector_dist_log:
                    dmap_inv = np.log(dmap_inv + 1)
                #display_resize = cv.resize(cv.normalize(dmap_inv, None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F), (1000, 1000))
                #cv.imshow("dmap2", display_resize)
                vec_tensor[:, :,
                           vec_band_idx] = np.where(mask, -dmap_inv, dmap)
                #display = cv.normalize(vec_tensor[:, :, vec_band_idx], None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F)
                #display_resize = cv.resize(display, (1000, 1000))
                #cv.imshow("distmap", display_resize)
                #cv.waitKey(0)
        if aux_vector_scale:
            for vec_band_idx in vec_tensor.shape[2]:
                vec_tensor[:, :, vec_band_idx] *= aux_vector_scale
        np_array = np.concatenate([np_array, vec_tensor], axis=2)
    return np_array