Ejemplo n.º 1
0
def sem_seg_inference(bucket, model, image, overlay):
    """Inference on images using semantic segmentation
    Args:
        bucket: bucket in which data is stored if using AWS S3
        model: model to use for inference
        image: full path of the image to infer on
        overlay: amount of overlay to apply

        returns a numpy array of the same size (h,w) as the input image, where each value is the predicted output.
    """
    # Chunk size. Should not be modified often. We want the biggest chunk to be process at a time but,
    # a too large image chunk will bust the GPU memory when processing.
    chunk_size = 512

    # switch to evaluate mode
    model.eval()

    if bucket:
        input_image = image_reader_as_array(
            f"Images/{os.path.basename(image)}")
    else:
        input_image = image_reader_as_array(image)

    if len(input_image.shape) == 3:
        h, w, nb = input_image.shape
        padded_array = np.pad(input_image, ((overlay, chunk_size),
                                            (overlay, chunk_size), (0, 0)),
                              mode='constant')
    elif len(input_image.shape) == 2:
        h, w = input_image.shape
        padded_array = np.expand_dims(np.pad(input_image,
                                             ((overlay, chunk_size),
                                              (overlay, chunk_size)),
                                             mode='constant'),
                                      axis=0)
    else:
        h = 0
        w = 0
        padded_array = None

    output_np = np.empty(
        [h + overlay + chunk_size, w + overlay + chunk_size, 1],
        dtype=np.uint8)

    if padded_array.any():
        with torch.no_grad():
            for row in range(0, h, chunk_size - (2 * overlay)):
                for col in range(0, w, chunk_size - (2 * overlay)):

                    chunk_input = padded_array[row:row + chunk_size,
                                               col:col + chunk_size, :]
                    inputs = torch.from_numpy(
                        np.float32(np.transpose(chunk_input, (2, 0, 1))))

                    inputs.unsqueeze_(0)

                    if torch.cuda.is_available():
                        inputs = inputs.cuda()
                    # forward
                    outputs = model(inputs)

                    a, pred = torch.max(outputs, dim=1)
                    segmentation = torch.squeeze(pred)

                    row_from = row + overlay
                    row_to = row + chunk_size - overlay
                    col_from = col + overlay
                    col_to = col + chunk_size - overlay

                    useful_sem_seg = segmentation[overlay:chunk_size - overlay,
                                                  overlay:chunk_size - overlay]
                    output_np[row_from:row_to, col_from:col_to,
                              0] = useful_sem_seg.cpu()

            # Resize the output array to the size of the input image and write it
            output_np = output_np[overlay:h + overlay, overlay:w + overlay]
            return output_np
    else:
        print(
            "Error classifying image : Image shape of {:1} is not recognized".
            format(len(input_image.shape)))
Ejemplo n.º 2
0
def main(params):
    """
    Identify the class to which each image belongs.
    :param params: (dict) Parameters found in the yaml config file.

    """
    since = time.time()
    csv_file = params['inference']['img_csv_file']

    bucket = None
    bucket_name = params['global']['bucket_name']

    model, state_dict_path, model_name = net(params, inference=True)
    if torch.cuda.is_available():
        model = model.cuda()

    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'img_csv_file.csv')
        list_img = read_csv('img_csv_file.csv', inference=True)
    else:
        list_img = read_csv(csv_file, inference=True)

    if params['global']['task'] == 'classification':
        classifier(params, list_img, model)

    elif params['global']['task'] == 'segmentation':
        if bucket:
            bucket.download_file(state_dict_path, "saved_model.pth.tar")
            model = load_from_checkpoint("saved_model.pth.tar", model)
        else:
            model = load_from_checkpoint(state_dict_path, model)

        chunk_size, nbr_pix_overlap = calc_overlap(params)
        num_classes = params['global']['num_classes']
        for img in list_img:
            img_name = os.path.basename(img['tif'])
            if bucket:
                local_img = f"Images/{img_name}"
                bucket.download_file(img['tif'], local_img)
                inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif"
            else:
                local_img = img['tif']
                inference_image = os.path.join(
                    params['inference']['working_folder'],
                    f"{img_name.split('.')[0]}_inference.tif")

            assert_band_number(local_img, params['global']['number_of_bands'])

            nd_array_tif = image_reader_as_array(local_img)
            sem_seg_results = sem_seg_inference(model, nd_array_tif,
                                                nbr_pix_overlap, chunk_size,
                                                num_classes)
            create_new_raster_from_base(local_img, inference_image,
                                        sem_seg_results)
            print(f"Semantic segmentation of image {img_name} completed")
            if bucket:
                bucket.upload_file(
                    inference_image,
                    os.path.join(params['inference']['working_folder'],
                                 f"{img_name.split('.')[0]}_inference.tif"))
    else:
        raise ValueError(
            f"The task should be either classification or segmentation. The provided value is {params['global']['task']}"
        )

    time_elapsed = time.time() - since
    print('Inference completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
Ejemplo n.º 3
0
def main(params):
    """
    Training and validation datasets preparation.
    :param params: (dict) Parameters found in the yaml config file.

    """
    gpkg_file = []
    bucket_name = params['global']['bucket_name']
    data_path = params['global']['data_path']
    metadata_file = params['global']['metadata_file']
    csv_file = params['sample']['prep_csv_file']

    if metadata_file:
        image_metadata = read_parameters(metadata_file)
    else:
        image_metadata = None

    final_samples_folder = None
    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
        else:
            final_samples_folder = "samples"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0, 'tst': 0}
    number_classes = 0

    trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets(
        params, samples_folder)

    with tqdm(list_data_prep) as _tqdm:
        for info in _tqdm:

            if bucket_name:
                bucket.download_file(info['tif'],
                                     "Images/" + info['tif'].split('/')[-1])
                info['tif'] = "Images/" + info['tif'].split('/')[-1]
                if info['gpkg'] not in gpkg_file:
                    gpkg_file.append(info['gpkg'])
                    bucket.download_file(info['gpkg'],
                                         info['gpkg'].split('/')[-1])
                info['gpkg'] = info['gpkg'].split('/')[-1]

            assert_band_number(info['tif'],
                               params['global']['number_of_bands'])

            _tqdm.set_postfix(
                OrderedDict(file=f'{info["tif"]}',
                            sample_size=params['global']['samples_size']))

            # Read the input raster image
            np_input_image = image_reader_as_array(info['tif'])

            # Validate the number of class in the vector file
            validate_num_classes(info['gpkg'], params['global']['num_classes'],
                                 info['attribute_name'])

            # Burn vector file in a raster file
            np_label_raster = vector_to_raster(info['gpkg'], info['tif'],
                                               info['attribute_name'])

            # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre
            # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images.
            scale = params['global']['scale_data'] if params['global'][
                'scale_data'] else True
            if scale:
                sc_min, sc_max = params['global']['scale_data']
                np_input_image = minmax_scale(
                    np_input_image,
                    orig_range=(np.min(np_input_image),
                                np.max(np_input_image)),
                    scale_range=(sc_min, sc_max))

            # Mask the zeros from input image into label raster.
            if params['sample']['mask_reference']:
                np_label_raster = mask_image(np_input_image, np_label_raster)

            if info['dataset'] == 'trn':
                out_file = trn_hdf5
            elif info['dataset'] == 'val':
                out_file = val_hdf5
            elif info['dataset'] == 'tst':
                out_file = tst_hdf5
            else:
                raise ValueError(
                    f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}"
                )

            np_label_raster = np.reshape(
                np_label_raster,
                (np_label_raster.shape[0], np_label_raster.shape[1], 1))
            number_samples, number_classes = samples_preparation(
                np_input_image, np_label_raster,
                params['global']['samples_size'],
                params['sample']['samples_dist'], number_samples,
                number_classes, out_file, info['dataset'],
                params['sample']['min_annotated_percent'], image_metadata)

            _tqdm.set_postfix(OrderedDict(number_samples=number_samples))
            out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()
    tst_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name and final_samples_folder:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')
        bucket.upload_file(samples_folder + "/tst_samples.hdf5",
                           final_samples_folder + '/tst_samples.hdf5')

    print("End of process")
def samples_preparation(sat_img, ref_img, sample_size, dist_samples,
                        samples_count, num_classes, samples_file, dataset,
                        background_switch):
    """Extract and write samples from input image and reference image
    Args:
        sat_img: Path and name to the input image
        ref_img: path and name to the reference image
        sample_size: Size (in pixel) of the samples to create
        dist_samples: Distance (in pixel) between samples in both images
        samples_count: Current number of samples created (will be appended and return)
        num_classes: Number of classes in reference data (will be appended and return)
        samples_file: hdfs file where samples will be written
        dataset: Type of dataset where the samples will be written. Can be either of 'trn' or 'val'
        background_switch: Indicate if samples containing only background pixels will be written or discarded
    """

    # read input and reference images as array
    in_img_array = image_reader_as_array(sat_img)
    label_array = image_reader_as_array(ref_img)

    h, w, num_bands = in_img_array.shape

    if dataset == 'trn':
        idx_samples = samples_count['trn']
    elif dataset == 'val':
        idx_samples = samples_count['val']

    # half tile padding
    half_tile = int(sample_size / 2)
    pad_in_img_array = np.pad(in_img_array, ((half_tile, half_tile),
                                             (half_tile, half_tile), (0, 0)),
                              mode='constant')
    pad_label_array = np.pad(label_array, ((half_tile, half_tile),
                                           (half_tile, half_tile), (0, 0)),
                             mode='constant')

    for row in range(0, h, dist_samples):
        for column in range(0, w, dist_samples):
            data = (pad_in_img_array[row:row + sample_size,
                                     column:column + sample_size, :])
            target = np.squeeze(
                pad_label_array[row:row + sample_size,
                                column:column + sample_size, :],
                axis=2)

            target_class_num = max(target.ravel())

            if (background_switch
                    and target_class_num != 0) or (not background_switch):
                resize_datasets(samples_file)
                samples_file["sat_img"][idx_samples, ...] = data
                samples_file["map_img"][idx_samples, ...] = target
                idx_samples += 1

            if num_classes < target_class_num:
                num_classes = target_class_num

    if dataset == 'trn':
        samples_count['trn'] = idx_samples
    elif dataset == 'val':
        samples_count['val'] = idx_samples

    # return the appended samples count and number of classes.
    return samples_count, num_classes
def main(bucket_name, data_path, samples_size, num_classes, number_of_bands,
         csv_file, samples_dist, remove_background, mask_input_image,
         mask_reference):
    gpkg_file = []
    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
            final_out_label_folder = os.path.join(data_path, "label")
        else:
            final_samples_folder = "samples"
            final_out_label_folder = "label"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0}
    number_classes = 0

    trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w")
    val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w")

    trn_hdf5.create_dataset("sat_img",
                            (0, samples_size, samples_size, number_of_bands),
                            np.float32,
                            maxshape=(None, samples_size, samples_size,
                                      number_of_bands))
    trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size),
                            np.uint8,
                            maxshape=(None, samples_size, samples_size))
    val_hdf5.create_dataset("sat_img",
                            (0, samples_size, samples_size, number_of_bands),
                            np.float32,
                            maxshape=(None, samples_size, samples_size,
                                      number_of_bands))
    val_hdf5.create_dataset("map_img", (0, samples_size, samples_size),
                            np.uint8,
                            maxshape=(None, samples_size, samples_size))
    for info in list_data_prep:
        img_name = os.path.basename(info['tif']).split('.')[0]
        tmp_label_name = os.path.join(out_label_folder,
                                      img_name + "_label_tmp.tif")
        label_name = os.path.join(out_label_folder, img_name + "_label.tif")

        if bucket_name:
            bucket.download_file(info['tif'],
                                 "Images/" + info['tif'].split('/')[-1])
            info['tif'] = "Images/" + info['tif'].split('/')[-1]
            if info['gpkg'] not in gpkg_file:
                gpkg_file.append(info['gpkg'])
                bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1])
            info['gpkg'] = info['gpkg'].split('/')[-1]
        assert_band_number(info['tif'], number_of_bands)

        value_field = info['attribute_name']
        validate_num_classes(info['gpkg'], num_classes, value_field)

        # Mask zeros from input image into label raster.
        if mask_reference:
            tmp_label_raster = create_new_raster_from_base(
                info['tif'], tmp_label_name, 1)
            vector_to_raster(info['gpkg'], info['attribute_name'],
                             tmp_label_raster)
            tmp_label_raster = None

            masked_array = mask_image(image_reader_as_array(info['tif']),
                                      image_reader_as_array(tmp_label_name))
            create_new_raster_from_base(info['tif'], label_name, 1,
                                        masked_array)

            os.remove(tmp_label_name)

        else:
            label_raster = create_new_raster_from_base(info['tif'], label_name,
                                                       1)
            vector_to_raster(info['gpkg'], info['attribute_name'],
                             label_raster)
            label_raster = None

        # Mask zeros from label raster into input image.
        if mask_input_image:
            masked_img = mask_image(image_reader_as_array(label_name),
                                    image_reader_as_array(info['tif']))
            create_new_raster_from_base(label_name, info['tif'],
                                        number_of_bands, masked_img)

        if info['dataset'] == 'trn':
            out_file = trn_hdf5
        elif info['dataset'] == 'val':
            out_file = val_hdf5

        number_samples, number_classes = samples_preparation(
            info['tif'], label_name, samples_size, samples_dist,
            number_samples, number_classes, out_file, info['dataset'],
            remove_background)
        print(info['tif'])
        print(number_samples)
        out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')

    print("End of process")
Ejemplo n.º 6
0
def main(params):
    """
    Training and validation datasets preparation.
    :param params: (dict) Parameters found in the yaml config file.

    """
    gpkg_file = []
    bucket_name = params['global']['bucket_name']
    data_path = params['global']['data_path']
    csv_file = params['sample']['prep_csv_file']

    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
        else:
            final_samples_folder = "samples"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0, 'tst': 0}
    number_classes = 0

    trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets(
        params, samples_folder)

    for info in list_data_prep:

        if bucket_name:
            bucket.download_file(info['tif'],
                                 "Images/" + info['tif'].split('/')[-1])
            info['tif'] = "Images/" + info['tif'].split('/')[-1]
            if info['gpkg'] not in gpkg_file:
                gpkg_file.append(info['gpkg'])
                bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1])
            info['gpkg'] = info['gpkg'].split('/')[-1]

        assert_band_number(info['tif'], params['global']['number_of_bands'])

        # Read the input raster image
        np_input_image = image_reader_as_array(info['tif'])

        # Validate the number of class in the vector file
        validate_num_classes(info['gpkg'], params['global']['num_classes'],
                             info['attribute_name'])

        # Burn vector file in a raster file
        np_label_raster = vector_to_raster(info['gpkg'], info['tif'],
                                           info['attribute_name'])

        # Mask the zeros from input image into label raster.
        if params['sample']['mask_reference']:
            np_label_raster = mask_image(np_input_image, np_label_raster)

        if info['dataset'] == 'trn':
            out_file = trn_hdf5
        elif info['dataset'] == 'val':
            out_file = val_hdf5
        elif info['dataset'] == 'tst':
            out_file = tst_hdf5
        else:
            raise ValueError(
                f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}"
            )

        np_label_raster = np.reshape(
            np_label_raster,
            (np_label_raster.shape[0], np_label_raster.shape[1], 1))
        number_samples, number_classes = samples_preparation(
            np_input_image, np_label_raster, params['global']['samples_size'],
            params['sample']['samples_dist'], number_samples, number_classes,
            out_file, info['dataset'],
            params['sample']['min_annotated_percent'])

        print(info['tif'])
        print(number_samples)
        out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()
    tst_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')
        bucket.upload_file(samples_folder + "/tst_samples.hdf5",
                           final_samples_folder + '/tst_samples.hdf5')

    print("End of process")
Ejemplo n.º 7
0
def main(bucket_name, data_path, samples_size, num_classes, number_of_bands,
         csv_file, samples_dist, remove_background, mask_input_image,
         mask_reference):
    gpkg_file = []
    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
        else:
            final_samples_folder = "samples"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0}
    number_classes = 0

    trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w")
    val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w")

    trn_hdf5.create_dataset("sat_img",
                            (0, samples_size, samples_size, number_of_bands),
                            np.float32,
                            maxshape=(None, samples_size, samples_size,
                                      number_of_bands))
    trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size),
                            np.uint8,
                            maxshape=(None, samples_size, samples_size))
    val_hdf5.create_dataset("sat_img",
                            (0, samples_size, samples_size, number_of_bands),
                            np.float32,
                            maxshape=(None, samples_size, samples_size,
                                      number_of_bands))
    val_hdf5.create_dataset("map_img", (0, samples_size, samples_size),
                            np.uint8,
                            maxshape=(None, samples_size, samples_size))
    for info in list_data_prep:

        if bucket_name:
            bucket.download_file(info['tif'],
                                 "Images/" + info['tif'].split('/')[-1])
            info['tif'] = "Images/" + info['tif'].split('/')[-1]
            if info['gpkg'] not in gpkg_file:
                gpkg_file.append(info['gpkg'])
                bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1])
            info['gpkg'] = info['gpkg'].split('/')[-1]
        assert_band_number(info['tif'], number_of_bands)

        # Read the input raster image
        np_input_image = image_reader_as_array(info['tif'])

        # Validate the number of class in the vector file
        validate_num_classes(info['gpkg'], num_classes, info['attribute_name'])

        # Burn vector file in a raster file
        np_label_raster = vector_to_raster(info['gpkg'], info['tif'],
                                           info['attribute_name'])

        # Mask the zeros from input image into label raster.
        if mask_reference:
            np_label_raster = mask_image(np_input_image, np_label_raster)

        # Mask zeros from label raster into input image otherwise use original image
        if mask_input_image:
            np_input_image = mask_image(np_label_raster, np_input_image)

        if info['dataset'] == 'trn':
            out_file = trn_hdf5
        elif info['dataset'] == 'val':
            out_file = val_hdf5

        np_label_raster = np.reshape(
            np_label_raster,
            (np_label_raster.shape[0], np_label_raster.shape[1], 1))
        number_samples, number_classes = samples_preparation(
            np_input_image, np_label_raster, samples_size, samples_dist,
            number_samples, number_classes, out_file, info['dataset'],
            remove_background)

        print(info['tif'])
        print(number_samples)
        out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')

    print("End of process")