def main(params):
    """
    Identify the class to which each image belongs.
    :param params: (dict) Parameters found in the yaml config file.

    """
    since = time.time()
    csv_file = params['inference']['img_csv_file']

    bucket = None
    bucket_name = params['global']['bucket_name']

    model, state_dict_path, model_name = net(params, inference=True)
    if torch.cuda.is_available():
        model = model.cuda()

    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'img_csv_file.csv')
        list_img = read_csv('img_csv_file.csv', inference=True)
    else:
        list_img = read_csv(csv_file, inference=True)

    if params['global']['task'] == 'classification':
        classifier(params, list_img, model)

    elif params['global']['task'] == 'segmentation':
        if bucket:
            bucket.download_file(state_dict_path, "saved_model.pth.tar")
            model = load_from_checkpoint("saved_model.pth.tar", model)
        else:
            model = load_from_checkpoint(state_dict_path, model)

        chunk_size, nbr_pix_overlap = calc_overlap(params)
        num_classes = params['global']['num_classes']
        for img in list_img:
            img_name = os.path.basename(img['tif'])
            if bucket:
                local_img = f"Images/{img_name}"
                bucket.download_file(img['tif'], local_img)
                inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif"
            else:
                local_img = img['tif']
                inference_image = os.path.join(
                    params['inference']['working_folder'],
                    f"{img_name.split('.')[0]}_inference.tif")

            assert_band_number(local_img, params['global']['number_of_bands'])

            nd_array_tif = image_reader_as_array(local_img)
            sem_seg_results = sem_seg_inference(model, nd_array_tif,
                                                nbr_pix_overlap, chunk_size,
                                                num_classes)
            create_new_raster_from_base(local_img, inference_image,
                                        sem_seg_results)
            print(f"Semantic segmentation of image {img_name} completed")
            if bucket:
                bucket.upload_file(
                    inference_image,
                    os.path.join(params['inference']['working_folder'],
                                 f"{img_name.split('.')[0]}_inference.tif"))
    else:
        raise ValueError(
            f"The task should be either classification or segmentation. The provided value is {params['global']['task']}"
        )

    time_elapsed = time.time() - since
    print('Inference completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
Beispiel #2
0
def main(bucket, work_folder, img_list, weights_file_name, model,
         number_of_bands, overlay, classify, num_classes):
    """Identify the class to which each image belongs.
    Args:
        bucket: bucket in which data is stored if using AWS S3
        work_folder: full file path of the folder containing images
        img_list: list containing images to classify
        weights_file_name: full file path of the file containing weights
        model: loaded model with which inference should be done
        number_of_bands: number of bands in the input rasters
        overlay: amount of overlay to apply
        classify: True if doing a classification task, False if doing semantic segmentation
    """
    if torch.cuda.is_available():
        model = model.cuda()
    if bucket:
        bucket.download_file(weights_file_name, "saved_model.pth.tar")
        model = load_from_checkpoint("saved_model.pth.tar", model)
        if classify:
            classes_file = weights_file_name.split('/')[:-1]
            class_csv = ''
            for folder in classes_file:
                class_csv = os.path.join(class_csv, folder)
            bucket.download_file(os.path.join(class_csv, 'classes.csv'),
                                 'classes.csv')
            with open('classes.csv', 'rt') as file:
                reader = csv.reader(file)
                classes = list(reader)
    else:
        model = load_from_checkpoint(weights_file_name, model)
        if classify:
            classes_file = weights_file_name.split('/')[:-1]
            class_path = ''
            for c in classes_file:
                class_path = class_path + c + '/'
            with open(class_path + 'classes.csv', 'rt') as f:
                reader = csv.reader(f)
                classes = list(reader)
    since = time.time()
    classified_results = np.empty((0, 2 + num_classes))

    for img in img_list:
        img_name = os.path.basename(img['tif'])
        if bucket:
            local_img = f"Images/{img_name}"
            bucket.download_file(img['tif'], local_img)
            inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif"
        else:
            local_img = img['tif']
            inference_image = os.path.join(
                work_folder, f"{img_name.split('.')[0]}_inference.tif")

        assert_band_number(local_img, number_of_bands)
        if classify:
            outputs, predicted = classifier(bucket, model, img['tif'])
            top5 = heapq.nlargest(5, outputs.cpu().numpy()[0])
            top5_loc = []
            for i in top5:
                top5_loc.append(np.where(outputs.cpu().numpy()[0] == i)[0][0])
            print(f"Image {img_name} classified as {classes[0][predicted]}")
            print('Top 5 classes:')
            for i in range(0, 5):
                print(f"\t{classes[0][top5_loc[i]]} : {top5[i]}")
            classified_results = np.append(classified_results, [
                np.append([img['tif'], classes[0][predicted]],
                          outputs.cpu().numpy()[0])
            ],
                                           axis=0)
            print()
        else:
            sem_seg_results = sem_seg_inference(bucket, model, img['tif'],
                                                overlay)
            create_new_raster_from_base(local_img, inference_image,
                                        sem_seg_results)
            print(f"Semantic segmentation of image {img_name} completed")

        if bucket:
            if not classify:
                bucket.upload_file(
                    inference_image,
                    os.path.join(work_folder,
                                 f"{img_name.split('.')[0]}_inference.tif"))

    if classify:
        csv_results = 'classification_results.csv'
        if bucket:
            np.savetxt(csv_results,
                       classified_results,
                       fmt='%s',
                       delimiter=',')
            bucket.upload_file(csv_results,
                               os.path.join(work_folder, csv_results))
        else:
            np.savetxt(os.path.join(work_folder, csv_results),
                       classified_results,
                       fmt='%s',
                       delimiter=',')

    time_elapsed = time.time() - since
    print('Inference completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
Beispiel #3
0
def main(params):
    """
    Training and validation datasets preparation.
    :param params: (dict) Parameters found in the yaml config file.

    """
    gpkg_file = []
    bucket_name = params['global']['bucket_name']
    data_path = params['global']['data_path']
    metadata_file = params['global']['metadata_file']
    csv_file = params['sample']['prep_csv_file']

    if metadata_file:
        image_metadata = read_parameters(metadata_file)
    else:
        image_metadata = None

    final_samples_folder = None
    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
        else:
            final_samples_folder = "samples"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0, 'tst': 0}
    number_classes = 0

    trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets(
        params, samples_folder)

    with tqdm(list_data_prep) as _tqdm:
        for info in _tqdm:

            if bucket_name:
                bucket.download_file(info['tif'],
                                     "Images/" + info['tif'].split('/')[-1])
                info['tif'] = "Images/" + info['tif'].split('/')[-1]
                if info['gpkg'] not in gpkg_file:
                    gpkg_file.append(info['gpkg'])
                    bucket.download_file(info['gpkg'],
                                         info['gpkg'].split('/')[-1])
                info['gpkg'] = info['gpkg'].split('/')[-1]

            assert_band_number(info['tif'],
                               params['global']['number_of_bands'])

            _tqdm.set_postfix(
                OrderedDict(file=f'{info["tif"]}',
                            sample_size=params['global']['samples_size']))

            # Read the input raster image
            np_input_image = image_reader_as_array(info['tif'])

            # Validate the number of class in the vector file
            validate_num_classes(info['gpkg'], params['global']['num_classes'],
                                 info['attribute_name'])

            # Burn vector file in a raster file
            np_label_raster = vector_to_raster(info['gpkg'], info['tif'],
                                               info['attribute_name'])

            # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre
            # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images.
            scale = params['global']['scale_data'] if params['global'][
                'scale_data'] else True
            if scale:
                sc_min, sc_max = params['global']['scale_data']
                np_input_image = minmax_scale(
                    np_input_image,
                    orig_range=(np.min(np_input_image),
                                np.max(np_input_image)),
                    scale_range=(sc_min, sc_max))

            # Mask the zeros from input image into label raster.
            if params['sample']['mask_reference']:
                np_label_raster = mask_image(np_input_image, np_label_raster)

            if info['dataset'] == 'trn':
                out_file = trn_hdf5
            elif info['dataset'] == 'val':
                out_file = val_hdf5
            elif info['dataset'] == 'tst':
                out_file = tst_hdf5
            else:
                raise ValueError(
                    f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}"
                )

            np_label_raster = np.reshape(
                np_label_raster,
                (np_label_raster.shape[0], np_label_raster.shape[1], 1))
            number_samples, number_classes = samples_preparation(
                np_input_image, np_label_raster,
                params['global']['samples_size'],
                params['sample']['samples_dist'], number_samples,
                number_classes, out_file, info['dataset'],
                params['sample']['min_annotated_percent'], image_metadata)

            _tqdm.set_postfix(OrderedDict(number_samples=number_samples))
            out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()
    tst_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name and final_samples_folder:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')
        bucket.upload_file(samples_folder + "/tst_samples.hdf5",
                           final_samples_folder + '/tst_samples.hdf5')

    print("End of process")
def main(bucket_name, data_path, samples_size, num_classes, number_of_bands,
         csv_file, samples_dist, remove_background, mask_input_image,
         mask_reference):
    gpkg_file = []
    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
            final_out_label_folder = os.path.join(data_path, "label")
        else:
            final_samples_folder = "samples"
            final_out_label_folder = "label"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0}
    number_classes = 0

    trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w")
    val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w")

    trn_hdf5.create_dataset("sat_img",
                            (0, samples_size, samples_size, number_of_bands),
                            np.float32,
                            maxshape=(None, samples_size, samples_size,
                                      number_of_bands))
    trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size),
                            np.uint8,
                            maxshape=(None, samples_size, samples_size))
    val_hdf5.create_dataset("sat_img",
                            (0, samples_size, samples_size, number_of_bands),
                            np.float32,
                            maxshape=(None, samples_size, samples_size,
                                      number_of_bands))
    val_hdf5.create_dataset("map_img", (0, samples_size, samples_size),
                            np.uint8,
                            maxshape=(None, samples_size, samples_size))
    for info in list_data_prep:
        img_name = os.path.basename(info['tif']).split('.')[0]
        tmp_label_name = os.path.join(out_label_folder,
                                      img_name + "_label_tmp.tif")
        label_name = os.path.join(out_label_folder, img_name + "_label.tif")

        if bucket_name:
            bucket.download_file(info['tif'],
                                 "Images/" + info['tif'].split('/')[-1])
            info['tif'] = "Images/" + info['tif'].split('/')[-1]
            if info['gpkg'] not in gpkg_file:
                gpkg_file.append(info['gpkg'])
                bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1])
            info['gpkg'] = info['gpkg'].split('/')[-1]
        assert_band_number(info['tif'], number_of_bands)

        value_field = info['attribute_name']
        validate_num_classes(info['gpkg'], num_classes, value_field)

        # Mask zeros from input image into label raster.
        if mask_reference:
            tmp_label_raster = create_new_raster_from_base(
                info['tif'], tmp_label_name, 1)
            vector_to_raster(info['gpkg'], info['attribute_name'],
                             tmp_label_raster)
            tmp_label_raster = None

            masked_array = mask_image(image_reader_as_array(info['tif']),
                                      image_reader_as_array(tmp_label_name))
            create_new_raster_from_base(info['tif'], label_name, 1,
                                        masked_array)

            os.remove(tmp_label_name)

        else:
            label_raster = create_new_raster_from_base(info['tif'], label_name,
                                                       1)
            vector_to_raster(info['gpkg'], info['attribute_name'],
                             label_raster)
            label_raster = None

        # Mask zeros from label raster into input image.
        if mask_input_image:
            masked_img = mask_image(image_reader_as_array(label_name),
                                    image_reader_as_array(info['tif']))
            create_new_raster_from_base(label_name, info['tif'],
                                        number_of_bands, masked_img)

        if info['dataset'] == 'trn':
            out_file = trn_hdf5
        elif info['dataset'] == 'val':
            out_file = val_hdf5

        number_samples, number_classes = samples_preparation(
            info['tif'], label_name, samples_size, samples_dist,
            number_samples, number_classes, out_file, info['dataset'],
            remove_background)
        print(info['tif'])
        print(number_samples)
        out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')

    print("End of process")
Beispiel #5
0
def main(params):
    """
    Training and validation datasets preparation.
    :param params: (dict) Parameters found in the yaml config file.

    """
    gpkg_file = []
    bucket_name = params['global']['bucket_name']
    data_path = params['global']['data_path']
    csv_file = params['sample']['prep_csv_file']

    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
        else:
            final_samples_folder = "samples"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0, 'tst': 0}
    number_classes = 0

    trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets(
        params, samples_folder)

    for info in list_data_prep:

        if bucket_name:
            bucket.download_file(info['tif'],
                                 "Images/" + info['tif'].split('/')[-1])
            info['tif'] = "Images/" + info['tif'].split('/')[-1]
            if info['gpkg'] not in gpkg_file:
                gpkg_file.append(info['gpkg'])
                bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1])
            info['gpkg'] = info['gpkg'].split('/')[-1]

        assert_band_number(info['tif'], params['global']['number_of_bands'])

        # Read the input raster image
        np_input_image = image_reader_as_array(info['tif'])

        # Validate the number of class in the vector file
        validate_num_classes(info['gpkg'], params['global']['num_classes'],
                             info['attribute_name'])

        # Burn vector file in a raster file
        np_label_raster = vector_to_raster(info['gpkg'], info['tif'],
                                           info['attribute_name'])

        # Mask the zeros from input image into label raster.
        if params['sample']['mask_reference']:
            np_label_raster = mask_image(np_input_image, np_label_raster)

        if info['dataset'] == 'trn':
            out_file = trn_hdf5
        elif info['dataset'] == 'val':
            out_file = val_hdf5
        elif info['dataset'] == 'tst':
            out_file = tst_hdf5
        else:
            raise ValueError(
                f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}"
            )

        np_label_raster = np.reshape(
            np_label_raster,
            (np_label_raster.shape[0], np_label_raster.shape[1], 1))
        number_samples, number_classes = samples_preparation(
            np_input_image, np_label_raster, params['global']['samples_size'],
            params['sample']['samples_dist'], number_samples, number_classes,
            out_file, info['dataset'],
            params['sample']['min_annotated_percent'])

        print(info['tif'])
        print(number_samples)
        out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()
    tst_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')
        bucket.upload_file(samples_folder + "/tst_samples.hdf5",
                           final_samples_folder + '/tst_samples.hdf5')

    print("End of process")
def main(bucket_name, data_path, samples_size, num_classes, number_of_bands,
         csv_file, samples_dist, remove_background, mask_input_image,
         mask_reference):
    gpkg_file = []
    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
        else:
            final_samples_folder = "samples"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(data_path, "samples")
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0}
    number_classes = 0

    trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w")
    val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w")

    trn_hdf5.create_dataset("sat_img",
                            (0, samples_size, samples_size, number_of_bands),
                            np.float32,
                            maxshape=(None, samples_size, samples_size,
                                      number_of_bands))
    trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size),
                            np.uint8,
                            maxshape=(None, samples_size, samples_size))
    val_hdf5.create_dataset("sat_img",
                            (0, samples_size, samples_size, number_of_bands),
                            np.float32,
                            maxshape=(None, samples_size, samples_size,
                                      number_of_bands))
    val_hdf5.create_dataset("map_img", (0, samples_size, samples_size),
                            np.uint8,
                            maxshape=(None, samples_size, samples_size))
    for info in list_data_prep:

        if bucket_name:
            bucket.download_file(info['tif'],
                                 "Images/" + info['tif'].split('/')[-1])
            info['tif'] = "Images/" + info['tif'].split('/')[-1]
            if info['gpkg'] not in gpkg_file:
                gpkg_file.append(info['gpkg'])
                bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1])
            info['gpkg'] = info['gpkg'].split('/')[-1]
        assert_band_number(info['tif'], number_of_bands)

        # Read the input raster image
        np_input_image = image_reader_as_array(info['tif'])

        # Validate the number of class in the vector file
        validate_num_classes(info['gpkg'], num_classes, info['attribute_name'])

        # Burn vector file in a raster file
        np_label_raster = vector_to_raster(info['gpkg'], info['tif'],
                                           info['attribute_name'])

        # Mask the zeros from input image into label raster.
        if mask_reference:
            np_label_raster = mask_image(np_input_image, np_label_raster)

        # Mask zeros from label raster into input image otherwise use original image
        if mask_input_image:
            np_input_image = mask_image(np_label_raster, np_input_image)

        if info['dataset'] == 'trn':
            out_file = trn_hdf5
        elif info['dataset'] == 'val':
            out_file = val_hdf5

        np_label_raster = np.reshape(
            np_label_raster,
            (np_label_raster.shape[0], np_label_raster.shape[1], 1))
        number_samples, number_classes = samples_preparation(
            np_input_image, np_label_raster, samples_size, samples_dist,
            number_samples, number_classes, out_file, info['dataset'],
            remove_background)

        print(info['tif'])
        print(number_samples)
        out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')

    print("End of process")