def main(params): """ Training and validation datasets preparation. :param params: (dict) Parameters found in the yaml config file. """ gpkg_file = [] bucket_name = params['global']['bucket_name'] data_path = params['global']['data_path'] csv_file = params['sample']['prep_csv_file'] if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") else: final_samples_folder = "samples" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0, 'tst': 0} number_classes = 0 trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets(params, samples_folder) with tqdm(list_data_prep) as _tqdm: for info in _tqdm: if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], params['global']['number_of_bands']) _tqdm.set_postfix(OrderedDict(file=f'{info["tif"]}', sample_size=params['global']['samples_size'])) # Read the input raster image np_input_image = image_reader_as_array(info['tif']) # Validate the number of class in the vector file validate_num_classes(info['gpkg'], params['global']['num_classes'], info['attribute_name']) # Burn vector file in a raster file np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name']) # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images. scale = params['global']['scale_data'] if params['global']['scale_data'] else True if scale: sc_min, sc_max = params['global']['scale_data'] np_input_image = minmax_scale(np_input_image, orig_range=(np.min(np_input_image), np.max(np_input_image)), scale_range=(sc_min,sc_max)) # Mask the zeros from input image into label raster. if params['sample']['mask_reference']: np_label_raster = mask_image(np_input_image, np_label_raster) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 elif info['dataset'] == 'tst': out_file = tst_hdf5 else: raise ValueError(f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}") np_label_raster = np.reshape(np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation(np_input_image, np_label_raster, params['global']['samples_size'], params['sample']['samples_dist'], number_samples, number_classes, out_file, info['dataset'], params['sample']['min_annotated_percent']) _tqdm.set_postfix(OrderedDict(number_samples=number_samples)) out_file.flush() trn_hdf5.close() val_hdf5.close() tst_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5') print("End of process")
def main(params): """ Identify the class to which each image belongs. :param params: (dict) Parameters found in the yaml config file. """ since = time.time() img_dir_or_csv = params['inference']['img_dir_or_csv_file'] working_folder = Path(params['inference']['working_folder']) Path.mkdir(working_folder, exist_ok=True) print(f'Inferences will be saved to: {working_folder}') bucket = None bucket_name = params['global']['bucket_name'] model, state_dict_path, model_name = net(params, inference=True) num_devices = params['global']['num_gpus'] if params['global'][ 'num_gpus'] else 0 # list of GPU devices that are available and unused. If no GPUs, returns empty list lst_device_ids = get_device_ids( num_devices) if torch.cuda.is_available() else [] device = torch.device(f'cuda:{lst_device_ids[0]}' if torch.cuda. is_available() and lst_device_ids else 'cpu') if lst_device_ids: print(f"Using Cuda device {lst_device_ids[0]}") else: warnings.warn( f"No Cuda device available. This process will only run on CPU") model.to(device) if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) if img_dir_or_csv.endswith('.csv'): bucket.download_file(img_dir_or_csv, 'img_csv_file.csv') list_img = read_csv('img_csv_file.csv', inference=True) else: raise NotImplementedError( 'Specify a csv file containing images for inference. Directory input not implemented yet' ) else: if img_dir_or_csv.endswith('.csv'): list_img = read_csv(img_dir_or_csv, inference=True) else: img_dir = Path(img_dir_or_csv) assert img_dir.exists( ), f'Could not find directory "{img_dir_or_csv}"' list_img_paths = sorted(img_dir.glob('*.tif')) list_img = [] for img_path in list_img_paths: img = {} img['tif'] = img_path list_img.append(img) assert len( list_img) >= 0, f'No .tif files found in {img_dir_or_csv}' if params['global']['task'] == 'classification': classifier(params, list_img, model) elif params['global']['task'] == 'segmentation': if bucket: bucket.download_file(state_dict_path, "saved_model.pth.tar") model, _ = load_from_checkpoint("saved_model.pth.tar", model) else: model, _ = load_from_checkpoint(state_dict_path, model) chunk_size, nbr_pix_overlap = calc_overlap(params) num_classes = params['global']['num_classes'] with tqdm(list_img, desc='image list', position=0) as _tqdm: for img in _tqdm: img_name = os.path.basename(img['tif']) if bucket: local_img = f"Images/{img_name}" bucket.download_file(img['tif'], local_img) inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif" else: local_img = img['tif'] inference_image = os.path.join( params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif") assert_band_number(local_img, params['global']['number_of_bands']) nd_array_tif = image_reader_as_array(local_img) assert (len(np.unique(nd_array_tif)) > 1), ( f'Image "{img_name}" only contains {np.unique(nd_array_tif)} value.' ) # See: http://cs231n.github.io/neural-networks-2/#datapre. e.g. Scale arrays from [0,255] to [0,1] scale = params['global']['scale_data'] if scale: sc_min, sc_max = params['global']['scale_data'] nd_array_tif = minmax_scale( nd_array_tif, orig_range=(np.min(nd_array_tif), np.max(nd_array_tif)), scale_range=(sc_min, sc_max)) if debug: _tqdm.set_postfix( OrderedDict(image_name=img_name, image_shape=nd_array_tif.shape, scale=scale)) sem_seg_results = sem_seg_inference(model, nd_array_tif, nbr_pix_overlap, chunk_size, num_classes, device) if debug and len(np.unique(sem_seg_results)) == 1: print( f'Something is wrong. Inference contains only "{np.unique(sem_seg_results)} value. Make sure ' f'"scale_data" parameter is coherent with parameters used for training model used in inference.' ) create_new_raster_from_base(local_img, inference_image, sem_seg_results) tqdm.write( f"Semantic segmentation of image {img_name} completed") if bucket: bucket.upload_file( inference_image, os.path.join( params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif")) else: raise ValueError( f"The task should be either classification or segmentation. The provided value is {params['global']['task']}" ) time_elapsed = time.time() - since print('Inference completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def main(params): """ Identify the class to which each image belongs. :param params: (dict) Parameters found in the yaml config file. """ since = time.time() csv_file = params['inference']['img_csv_file'] bucket = None bucket_name = params['global']['bucket_name'] model, state_dict_path, model_name = net(params, inference=True) num_devices = params['global']['num_gpus'] if params['global'][ 'num_gpus'] else 0 # list of GPU devices that are available and unused. If no GPUs, returns empty list lst_device_ids = get_device_ids( num_devices) if torch.cuda.is_available() else [] device = torch.device(f'cuda:{lst_device_ids[0]}' if torch.cuda. is_available() and lst_device_ids else 'cpu') if lst_device_ids: print(f"Using Cuda device {lst_device_ids[0]}") else: warnings.warn( f"No Cuda device available. This process will only run on CPU") model.to(device) if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'img_csv_file.csv') list_img = read_csv('img_csv_file.csv', inference=True) else: list_img = read_csv(csv_file, inference=True) if params['global']['task'] == 'classification': classifier(params, list_img, model) elif params['global']['task'] == 'segmentation': if bucket: bucket.download_file(state_dict_path, "saved_model.pth.tar") model = load_from_checkpoint("saved_model.pth.tar", model) else: model = load_from_checkpoint(state_dict_path, model) chunk_size, nbr_pix_overlap = calc_overlap(params) num_classes = params['global']['num_classes'] for img in tqdm(list_img, desc='image list', position=0): img_name = os.path.basename(img['tif']) if bucket: local_img = f"Images/{img_name}" bucket.download_file(img['tif'], local_img) inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif" else: local_img = img['tif'] inference_image = os.path.join( params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif") assert_band_number(local_img, params['global']['number_of_bands']) nd_array_tif = image_reader_as_array(local_img) # See: http://cs231n.github.io/neural-networks-2/#datapre # e.g. Scale arrays from [0,255] to [0,1] scale = params['global']['scale_data'] if params['global'][ 'scale_data'] else True if scale: sc_min, sc_max = params['global']['scale_data'] nd_array_tif = minmax_scale(nd_array_tif, orig_range=(np.min(nd_array_tif), np.max(nd_array_tif)), scale_range=(sc_min, sc_max)) sem_seg_results = sem_seg_inference(model, nd_array_tif, nbr_pix_overlap, chunk_size, num_classes, device) create_new_raster_from_base(local_img, inference_image, sem_seg_results) tqdm.write(f"Semantic segmentation of image {img_name} completed") if bucket: bucket.upload_file( inference_image, os.path.join(params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif")) else: raise ValueError( f"The task should be either classification or segmentation. The provided value is {params['global']['task']}" ) time_elapsed = time.time() - since print('Inference completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def image_reader_as_array(input_image, scale=None, aux_vector_file=None, aux_vector_attrib=None, aux_vector_ids=None, aux_vector_dist_maps=False, aux_vector_dist_log=True, aux_vector_scale=None): """Read an image from a file and return a 3d array (h,w,c) Args: input_image: Rasterio file handle holding the (already opened) input raster scale: optional scaling factor for the raw data aux_vector_file: optional vector file from which to extract auxiliary shapes aux_vector_attrib: optional vector file attribute name to parse in order to fetch ids aux_vector_ids: optional vector ids to target in the vector file above aux_vector_dist_maps: flag indicating whether aux vector bands should be distance maps or binary maps aux_vector_dist_log: flag indicating whether log distances should be used in distance maps or not aux_vector_scale: optional floating point scale factor to multiply to rasterized vector maps Return: numpy array of the image (possibly concatenated with auxiliary vector channels) """ np_array = np.empty( [input_image.height, input_image.width, input_image.count], dtype=np.float32) for i in range(input_image.count): np_array[:, :, i] = input_image.read( i + 1) # Bands starts at 1 in rasterio not 0 # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images. if scale: sc_min, sc_max = scale np_array = minmax_scale(img=np_array, orig_range=(np.min(np_array), np.max(np_array)), scale_range=(sc_min, sc_max)) # if requested, load vectors from external file, rasterize, and append distance maps to array if aux_vector_file is not None: vec_tensor = vector_to_raster(vector_file=aux_vector_file, input_image=input_image, attribute_name=aux_vector_attrib, fill=0, target_ids=aux_vector_ids, merge_all=False) if aux_vector_dist_maps: import cv2 as cv # opencv becomes a project dependency only if we need to compute distance maps here vec_tensor = vec_tensor.astype(np.float32) for vec_band_idx in range(vec_tensor.shape[2]): mask = vec_tensor[:, :, vec_band_idx] mask = cv.dilate( mask, (3, 3)) # make points and linestring easier to work with #display_resize = cv.resize(np.where(mask, np.uint8(0), np.uint8(255)), (1000, 1000)) #cv.imshow("mask", display_resize) dmap = cv.distanceTransform( np.where(mask, np.uint8(0), np.uint8(255)), cv.DIST_L2, cv.DIST_MASK_PRECISE) if aux_vector_dist_log: dmap = np.log(dmap + 1) #display_resize = cv.resize(cv.normalize(dmap, None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F), (1000, 1000)) #cv.imshow("dmap1", display_resize) dmap_inv = cv.distanceTransform( np.where(mask, np.uint8(255), np.uint8(0)), cv.DIST_L2, cv.DIST_MASK_PRECISE) if aux_vector_dist_log: dmap_inv = np.log(dmap_inv + 1) #display_resize = cv.resize(cv.normalize(dmap_inv, None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F), (1000, 1000)) #cv.imshow("dmap2", display_resize) vec_tensor[:, :, vec_band_idx] = np.where(mask, -dmap_inv, dmap) #display = cv.normalize(vec_tensor[:, :, vec_band_idx], None, 0, 1, cv.NORM_MINMAX, dtype=cv.CV_32F) #display_resize = cv.resize(display, (1000, 1000)) #cv.imshow("distmap", display_resize) #cv.waitKey(0) if aux_vector_scale: for vec_band_idx in vec_tensor.shape[2]: vec_tensor[:, :, vec_band_idx] *= aux_vector_scale np_array = np.concatenate([np_array, vec_tensor], axis=2) return np_array