def test_update_flags(self): """ """ from iota2.Sampling.SamplesSelection import update_flags # prepare test input test_vector_name = "T31TCJ_samples_region_1_seed_1_selection.sqlite" test_vector_table = "t31tcj_samples_region_1_seed_0_selection" test_vector = os.path.join(self.test_working_directory, test_vector_name) shutil.copy(self.selection_ref, test_vector) update_flags(test_vector, 2, table_name=test_vector_table) # assert updated_flag = "XXXX" nb_features_origin = len( fut.getFieldElement(self.selection_ref, driverName="SQLite", field="seed_0", mode="all", elemType="str")) features_test = fut.getFieldElement(test_vector, driverName="SQLite", field="seed_0", mode="all", elemType="str") nb_features_test_updated = features_test.count(updated_flag) self.assertTrue(nb_features_origin == nb_features_test_updated, msg="update features failed")
def test_vector_splits_cross_validation(self): from iota2.Sampling import SplitInSubSets as VS from iota2.Common import FileUtils as fut # We execute the function splitInSubSets() new_region_shape = self.new_regions_shapes[0] VS.splitInSubSets(new_region_shape, self.data_field, self.region_field, self.ratio, self.seeds, "ESRI Shapefile", crossValidation=True, random_seed=0) seed0 = fut.getFieldElement(new_region_shape, driverName="ESRI Shapefile", field="seed_0", mode="all", elemType="str") seed1 = fut.getFieldElement(new_region_shape, driverName="ESRI Shapefile", field="seed_1", mode="all", elemType="str") for elem in seed0: self.assertTrue(elem in ["unused", "learn"], msg="flag not in ['unused', 'learn']") for elem in seed1: self.assertTrue(elem in ["unused", "validation"], msg="flag not in ['unused', 'validation']")
def test_split_vector_by_region(self): """ test : split a vector by the region he belongs to """ from iota2.Sampling.VectorFormatting import split_vector_by_region from iota2.Common.Utils import run from iota2.Tests.UnitTests.Iota2Tests import random_update # define inputs nb_features_origin = len( fut.getFieldElement(self.in_vector, driverName="ESRI shapefile", field="region", mode="all", elemType="str")) nb_features_new_region = 5 test_vector_name = "T31TCJ_Samples.sqlite" test_vector = os.path.join(self.test_working_directory, test_vector_name) cmd = "ogr2ogr -nln output -f SQLite {} {}".format( test_vector, self.in_vector) run(cmd) random_update(test_vector, "output", "seed_0", "learn", nb_features_origin) random_update(test_vector, "output", "region", "2", nb_features_new_region) output_dir = self.test_working_directory region_field = "region" # launch function split_vector_by_region(test_vector, output_dir, region_field, runs=1, driver="SQLite") # assert vector_reg_1 = fut.FileSearch_AND(self.test_working_directory, True, "region_1")[0] vector_reg_2 = fut.FileSearch_AND(self.test_working_directory, True, "region_2")[0] feat_vect_reg_1 = len( fut.getFieldElement(vector_reg_1, driverName="SQLite", field="region", mode="all", elemType="str")) feat_vect_reg_2 = len( fut.getFieldElement(vector_reg_2, driverName="SQLite", field="region", mode="all", elemType="str")) self.assertTrue(nb_features_new_region == feat_vect_reg_2) self.assertTrue(nb_features_origin == feat_vect_reg_1 + feat_vect_reg_2)
def config_model(outputPath, region_field): """ usage : determine which model will class which tile """ #const output = None pos_tile = 0 formatting_vec_dir = os.path.join(outputPath, "formattingVectors") samples = fu.FileSearch_AND(formatting_vec_dir, True, ".shp") #init all_regions = [] for sample in samples: tile_name = os.path.splitext( os.path.basename(sample))[0].split("_")[pos_tile] regions = fu.getFieldElement(sample, driverName="ESRI Shapefile", field=region_field, mode="unique", elemType="str") for region in regions: all_regions.append((region, tile_name)) #{'model_name':[TileName, TileName...],'...':...,...} model_tiles = dict(fu.sortByFirstElem(all_regions)) #add tiles if they are missing by checking in /shapeRegion/ directory shape_region_dir = os.path.join(outputPath, "shapeRegion") shape_region_path = fu.FileSearch_AND(shape_region_dir, True, ".shp") #check if there is actually polygons shape_regions = [ elem for elem in shape_region_path if len( fu.getFieldElement(elem, driverName="ESRI Shapefile", field=region_field, mode="all", elemType="str")) >= 1 ] for shape_region in shape_regions: tile = os.path.splitext( os.path.basename(shape_region))[0].split("_")[-1] region = os.path.splitext( os.path.basename(shape_region))[0].split("_")[-2] for model_name, tiles_model in list(model_tiles.items()): if model_name.split("f")[0] == region and tile not in tiles_model: tiles_model.append(tile) #Construct output file string output = "AllModel:\n[" for model_name, tiles_model in list(model_tiles.items()): output_tmp = "\n\tmodelName:'{}'\n\ttilesList:'{}'".format( model_name, "_".join(tiles_model)) output = output + "\n\t{" + output_tmp + "\n\t}" output += "\n]" return output
def test_extract_maj_vote_samples(self): """ test the extraction of samples by class according to a ratio """ from iota2.Sampling.VectorFormatting import extract_maj_vote_samples from collections import Counter # define inputs in_vector_name = os.path.basename(self.in_vector) extracted_vector_name = "extracted_samples.sqlite" in_vector = os.path.join(self.test_working_directory, in_vector_name) extracted_vector = os.path.join(self.test_working_directory, extracted_vector_name) fut.cpShapeFile(self.in_vector.replace(".shp", ""), in_vector.replace(".shp", ""), [".prj", ".shp", ".dbf", ".shx"]) # launch function dataField = "code" regionField = "region" extraction_ratio = 0.5 extract_maj_vote_samples(in_vector, extracted_vector, extraction_ratio, dataField, regionField) # assert features_origin = fut.getFieldElement(self.in_vector, driverName="ESRI Shapefile", field=dataField, mode="all", elemType="str") by_class_origin = Counter(features_origin) features_in_vector = fut.getFieldElement(in_vector, driverName="ESRI Shapefile", field=dataField, mode="all", elemType="str") by_class_in_vector = Counter(features_in_vector) features_extract_vector = fut.getFieldElement(extracted_vector, driverName="SQLite", field=dataField, mode="all", elemType="str") by_class_extract_vector = Counter(features_extract_vector) buff = [] for class_name, class_count in list(by_class_origin.items()): buff.append(by_class_in_vector[class_name] == extraction_ratio * class_count) self.assertTrue(all(buff), msg="extraction of samples failed")
def test_split_selection(self): """ test dedicated to check if split_sel function works """ from iota2.Sampling.SamplesSelection import split_sel from Iota2Tests import random_update from TestsUtils import rename_table # prepare test input test_vector_name = "samples_region_1_seed_0.sqlite" test_vector_table = "t31tcj_samples_region_1_seed_0_selection" test_vector = os.path.join(self.test_working_directory, test_vector_name) shutil.copy(self.selection_ref, test_vector) # update "nb_feat" features to a new "new_tile_name" tile's name nb_feat = 10 new_tile_name = "T31TDJ" random_update(test_vector, test_vector_table, "tile_o", new_tile_name, nb_feat) rename_table(test_vector, old_table_name=test_vector_table, new_table_name="output") # launch function new_files = split_sel(test_vector, ["T31TCJ", new_tile_name], self.test_working_directory, "EPSG:2154") # assert nb_features_origin = len( fut.getFieldElement(self.selection_ref, driverName="SQLite", field="tile_o", mode="all", elemType="str")) nb_features_t31tcj = len( fut.getFieldElement(new_files[0], driverName="SQLite", field="tile_o", mode="all", elemType="str")) nb_features_t31tdj = len( fut.getFieldElement(new_files[1], driverName="SQLite", field="tile_o", mode="all", elemType="str")) self.assertTrue(nb_features_t31tdj == nb_feat, msg="split samples selection failed") self.assertTrue(nb_features_origin == nb_features_t31tdj + nb_features_t31tcj, msg="split samples selection failed")
def test_iota2_augmentation(self): """Test data augmentation workflow """ from collections import Counter class_augmentation_balance = DataAugmentation.SamplesAugmentationCounter( self.class_count, mode="balance", minNumber=None, byClass=None) DataAugmentation.DoAugmentation(self.vector_test, class_augmentation_balance, strategy="jitter", field="code", excluded_fields=[], Jstdfactor=10, Sneighbors=None, workingDirectory=None) class_count_test = Counter( fut.getFieldElement(self.vector_test, driverName="SQLite", field="code", mode="all", elemType="int")) samples_number = self.class_count[max( self.class_count, key=lambda key: self.class_count[key])] self.assertTrue( all([ samples_number == v for k, v in list(class_count_test.items()) ]))
def extract_class(vec_in: str, vec_out: str, target_class: List[str], data_field: str) -> int: """ Extract class IN: vec_in: str vec_out: str target_class:List[str] data_field: str """ from iota2.Common.Utils import run if type(target_class) != type(list()): target_class = target_class.data where = " OR ".join( ["{}={}".format(data_field.lower(), klass) for klass in target_class]) cmd = (f"ogr2ogr -f 'SQLite' -nln output -where '{where}' " f"{vec_out} {vec_in}") run(cmd) return len( fu.getFieldElement(vec_out, driverName="SQLite", field=data_field.lower(), mode="all", elemType="int"))
def split(regions_split: Dict[str, int], regions_tiles: Dict[str, List[str]], data_field: str, region_field: str) -> List[str]: """ function dedicated to split to huge regions in sub-regions Parameters ---------- regions_split: dict[string, int] regions_tiles: dict[str, List[str]] Return ------ list(str) """ from iota2.Common import FileUtils as fut updated_vectors = [] for region, fold in list(regions_split.items()): vector_paths = regions_tiles[region] for vec in vector_paths: # init dict new regions new_regions_dict = {} for sub_fold in range(fold): # new region's name are define here new_regions_dict["{}f{}".format(region, sub_fold + 1)] = [] # get possible class class_vector = fut.getFieldElement(vec, driverName="SQLite", field=data_field, mode="unique", elemType="str") dic_class = {} # get FID values for all class of current region into # the current tile for c_class in class_vector: dic_class[c_class] = get_fid_values(vec, data_field, region_field, region, c_class) nb_feat = 0 for _, fid_cl in list(dic_class.items()): if fid_cl: fid_folds = fut.splitList(fid_cl, fold) # fill new_regions_dict for i, fid_fold in enumerate(fid_folds): new_regions_dict[f"{region}f{i+1}"] += fid_fold nb_feat += len(fid_cl) update_vector(vec, region_field, new_regions_dict) if vec not in updated_vectors: updated_vectors.append(vec) return updated_vectors
def get_models(formatting_vector_directory: str, region_field: str, runs: int) -> List[Tuple[str, List[str], int]]: """ usage : describe samples spatial repartition function use to determine with shapeFile as to be merged in order to compute statistics thanks to otb_SampleSelection OUT: regions_tiles_seed [list] : example regions_tiles_seed = [('1', ['T1', 'T2'], 0), ('1', ['T1', T2], 1), ('2', ['T2', 'T3], 0), ('2', ['T2', 'T3], 1)] mean the region '1' is present in tiles 'T1' and 'T2' in run 0 and 1 and region '2' in 'T2', 'T3' in runs 0 and 1 """ # the way of getting region could be improve ? tiles = fut.FileSearch_AND(formatting_vector_directory, True, ".shp") region_tile = [] all_regions_in_run = [] for tile in tiles: all_regions = [] tile_name = os.path.splitext(os.path.basename(tile))[0] r_tmp = fut.getFieldElement(tile, driverName="ESRI Shapefile", field=region_field, mode="unique", elemType="str") for r_tile in r_tmp: if r_tile not in all_regions: all_regions.append(r_tile) for region in all_regions: if region not in all_regions_in_run: all_regions_in_run.append(region) region_tile.append((region, tile_name)) region_tile_tmp = dict(fut.sortByFirstElem(region_tile)) region_tile_dic = {} for region, region_tiles in list(region_tile_tmp.items()): region_tile_dic[region] = list(set(region_tiles)) all_regions_in_run = sorted(all_regions_in_run) regions_tiles_seed = [(region, region_tile_dic[region], run) for run in range(runs) for region in all_regions_in_run] return regions_tiles_seed
def region_tile(sample_sel_dir: str): """ """ tile_field_name = "tile_o" region_vectors = fut.FileSearch_AND(sample_sel_dir, True, ".shp") output = [] region_vectors = sorted(region_vectors) for region_vector in region_vectors: tiles = fut.getFieldElement(region_vector, driverName="ESRI Shapefile", field=tile_field_name, mode="unique", elemType="str") region_name = os.path.splitext( os.path.basename(region_vector))[0].split("_")[2] seed = os.path.splitext( os.path.basename(region_vector))[0].split("_")[4] tiles = sorted(tiles) for tile in tiles: output.append((region_name, seed, tile)) return output
def test_vectorSplitsNoSplits(self): from iota2.Sampling import SplitInSubSets as VS from iota2.Common import FileUtils as fut new_region_shape = self.new_regions_shapes[0] tile_name = os.path.splitext(os.path.basename(new_region_shape))[0] VS.splitInSubSets(new_region_shape, self.data_field, self.region_field, self.ratio, 1, "ESRI Shapefile", crossValidation=False, splitGroundTruth=False, random_seed=0) seed0 = fut.getFieldElement(new_region_shape, driverName="ESRI Shapefile", field="seed_0", mode="all", elemType="str") for elem in seed0: self.assertTrue(elem in ["learn"], msg="flag not in ['learn']")
def gen_raster_ref(vec, output_path, masks_name, working_directory): """ generate the reference image needed to sampleSelection application Parameters ---------- vec : string path to the shapeFile containing all polygons dedicated to learn a model. cfg : ServiceConfigFile object working_directory : string Path to a working directory """ from iota2.Common.Utils import run tile_field_name = "tile_o" # iota2_dir = cfg.getParam('chain', 'outputPath') features_directory = os.path.join(output_path, "features") tiles = fut.getFieldElement(vec, driverName="ESRI Shapefile", field=tile_field_name, mode="unique", elemType="str") # masks_name = fut.getCommonMaskName(cfg) + ".tif" rasters_tiles = [ fut.FileSearch_AND(os.path.join(features_directory, tile_name), True, masks_name)[0] for tile_name in tiles ] raster_ref_name = "ref_raster_{}.tif".format( os.path.splitext(os.path.basename(vec))[0]) raster_ref = os.path.join(working_directory, raster_ref_name) raster_ref_cmd = "gdal_merge.py -ot Byte -n 0 -createonly -o {} {}".format( raster_ref, " ".join(rasters_tiles)) run(raster_ref_cmd) return raster_ref, tiles
def confusion_fusion(input_vector: str, data_field: str, csv_out: str, txt_out: str, csv_path: str, runs: int, crop_mix: bool, annual_crop: List[str], annual_crop_label_replacement: int) -> None: """merge otb tile confusion matrix Parameters ---------- input_vector: str input database data_field: str data field csv_out: str output csv file which will contains the merge of matrix txt_out: str diretory which will contains the resulting file of merged matrix csv_path: str path to the directory which contains all *.csv files to merge runs: int number of random learning/validation samples-set crop_mix: bool inform if cropMix workflow is enable annual_crop: list list of annual labels annual_crop_label_replacement: int replace annual labels by annual_crop_label_replacement """ for seed in range(runs): #Recherche de toute les classes possible all_class = [] all_class = fu.getFieldElement(input_vector, "ESRI Shapefile", data_field, "unique") all_class = sorted(all_class) #Initialisation de la matrice finale all_conf = fu.FileSearch_AND(csv_path, True, "seed_" + str(seed) + ".csv") csv = fu.confCoordinatesCSV(all_conf) csv_f = fu.sortByFirstElem(csv) conf_mat = fu.gen_confusionMatrix(csv_f, all_class) if crop_mix: write_csv( conf_mat, all_class, csv_out + "/MatrixBeforeClassMerge_" + str(seed) + ".csv") conf_mat, all_class = replace_annual_crop_in_conf_mat( conf_mat, all_class, annual_crop, annual_crop_label_replacement) write_csv(conf_mat, all_class, csv_out + "/Classif_Seed_" + str(seed) + ".csv") else: write_csv(conf_mat, all_class, csv_out + "/Classif_Seed_" + str(seed) + ".csv") nbr_good = conf_mat.trace() nbr_sample = conf_mat.sum() if nbr_sample > 1: overall_acc = float(nbr_good) / float(nbr_sample) else: overall_acc = 0.0 kappa = compute_kappa(conf_mat) precision = compute_precision_by_class(conf_mat, all_class) recall = compute_recall_by_class(conf_mat, all_class) f_score = compute_fscore_by_class(precision, recall, all_class) write_results( f_score, recall, precision, kappa, overall_acc, all_class, txt_out + "/ClassificationResults_seed_" + str(seed) + ".txt")
def create_tile_region_masks(tile_region: str, region_field: str, tile_name: str, output_directory: str, origin_name: str, img_ref: str) -> None: """ Parameters ---------- tile_region : string path to a SQLite file containing polygons. Each feature is a region region_field : string region's field tile_name : string current tile name output_directory : string directory to save masks origin_name : string region's field vector file name img_ref : string path to a tile reference image """ from iota2.Common import FileUtils as fut from iota2.Common import OtbAppBank as otb from iota2.Common.Utils import run all_regions_tmp = fut.getFieldElement(tile_region, driverName="SQLite", field=region_field.lower(), mode="unique", elemType="str") # transform sub region'name into complete region # (region '1f1' become region '1') all_regions = [] for region in all_regions_tmp: reg = region.split("f")[0] all_regions.append(reg) region = None for region in all_regions: output_name = f"{origin_name}_region_{region}_{tile_name}.shp" output_path = os.path.join(output_directory, output_name) db_name = (os.path.splitext(os.path.basename(tile_region))[0]).lower() cmd = ( f"ogr2ogr -f 'ESRI Shapefile' -sql \"SELECT * FROM {db_name}" f" WHERE {region_field}='{region}'\" {output_path} {tile_region}") run(cmd) path, _ = os.path.splitext(output_path) tile_region_raster = "{}.tif".format(path) tile_region_app = otb.CreateRasterizationApplication({ "in": output_path, "out": tile_region_raster, "im": img_ref, "mode": "binary", "pixType": "uint8", "background": "0", "mode.binary.foreground": "1" }) tile_region_app.ExecuteAndWriteOutput()
def test_splitbySets(self): """ test the split of a given vector """ from iota2.Sampling.VectorFormatting import split_by_sets # launch function s0_val, s0_learn, s1_val, s1_learn = split_by_sets( self.in_vector, 2, self.test_working_directory, 2154, 2154, "T31TCJ", cross_valid=False, split_ground_truth=True) # assert seed_0 = fut.getFieldElement(self.in_vector, driverName="ESRI Shapefile", field="seed_0", mode="all", elemType="str") seed_1 = fut.getFieldElement(self.in_vector, driverName="ESRI Shapefile", field="seed_1", mode="all", elemType="str") seed_0_learn_ref = seed_0.count("learn") seed_0_val_ref = seed_0.count("validation") seed_1_learn_ref = seed_1.count("learn") seed_1_val_ref = seed_1.count("validation") seed_0_learn_test = len( fut.getFieldElement(s0_learn, driverName="SQLite", field="region", mode="all", elemType="str")) seed_0_val_test = len( fut.getFieldElement(s0_val, driverName="SQLite", field="region", mode="all", elemType="str")) seed_1_learn_test = len( fut.getFieldElement(s1_learn, driverName="SQLite", field="region", mode="all", elemType="str")) seed_1_val_test = len( fut.getFieldElement(s1_val, driverName="SQLite", field="region", mode="all", elemType="str")) self.assertTrue(seed_0_learn_test == seed_0_learn_ref, msg="wrong number of learning samples in seed 0") self.assertTrue(seed_1_learn_test == seed_1_learn_ref, msg="wrong number of learning samples in seed 1") self.assertTrue(seed_0_val_test == seed_0_val_ref, msg="wrong number of validation samples in seed 0") self.assertTrue(seed_1_val_test == seed_1_val_ref, msg="wrong number of validation samples in seed 1")
def split_vector_by_region(in_vect: str, output_dir: str, region_field: str, runs: Optional[int] = 1, driver: Optional[str] = "ESRI shapefile", proj_in: Optional[str] = "EPSG:2154", proj_out: Optional[str] = "EPSG:2154", mode: Optional[str] = "usually") -> List[str]: """ create new files by regions in input vector. Parameters ---------- in_vect : string input vector path output_dir : string path to output directory region_field : string field in in_vect describing regions driver : string ogr driver proj_in : string input projection proj_out : string output projection mode : string define if we split SAR sensor to the other Return ------ list paths to new output vectors """ from iota2.Common import FileUtils as fut from iota2.Common.Utils import run output_paths = [] # const tile_pos = 0 learn_flag = "learn" table_name = "output" vec_name = os.path.split(in_vect)[-1] tile = vec_name.split("_")[tile_pos] extent = os.path.splitext(vec_name)[-1] regions = fut.getFieldElement(in_vect, driverName=driver, field=region_field, mode="unique", elemType="str") table = vec_name.split(".")[0] if driver != "ESRI shapefile": table = "output" # split vector for seed in range(runs): fields_to_keep = ",".join([ elem for elem in fut.get_all_fields_in_shape(in_vect, "SQLite") if "seed_" not in elem ]) for region in regions: out_vec_name_learn = "_".join([ tile, "region", region, "seed" + str(seed), "Samples_learn_tmp" ]) if mode != "usually": out_vec_name_learn = "_".join([ tile, "region", region, "seed" + str(seed), "Samples", "SAR", "learn_tmp" ]) output_vec_learn = os.path.join(output_dir, out_vec_name_learn + extent) seed_clause_learn = f"seed_{seed}='{learn_flag}'" region_clause = f"{region_field}='{region}'" # split vectors by runs and learning sets sql_cmd_learn = (f"select * FROM {table} WHERE {seed_clause_learn}" f" AND {region_clause}") cmd = (f'ogr2ogr -t_srs {proj_out} -s_srs {proj_in} -nln {table}' f' -f "{driver}" -sql "{sql_cmd_learn}" {output_vec_learn} ' f'{in_vect}') run(cmd) # drop useless column sql_clause = f"select GEOMETRY,{fields_to_keep} from {table_name}" output_vec_learn_out = output_vec_learn.replace("_tmp", "") cmd = ( f"ogr2ogr -s_srs {proj_in} -t_srs {proj_out} -dialect " f"'SQLite' -f 'SQLite' -nln {table_name} -sql '{sql_clause}' " f"{output_vec_learn_out} {output_vec_learn}") run(cmd) output_paths.append(output_vec_learn_out) os.remove(output_vec_learn) return output_paths
def test_VectorFormatting(self): """ test vectorFormatting function random function is used in Sampling.VectorFormatting.VectorFormatting we can only check if there is expected number of features with expected fields and some features values """ from iota2.Sampling.VectorFormatting import vector_formatting from iota2.Common import ServiceConfigFile as SCF from iota2.Common import IOTA2Directory from iota2.Common.Utils import run from iota2.VectorTools.ChangeNameField import changeName # define inputs test_output = os.path.join(self.test_working_directory, "IOTA2_dir_VectorFormatting") # prepare ground truth ground_truth = os.path.join(self.test_working_directory, "groundTruth_test.shp") cmd = "ogr2ogr -s_srs EPSG:2154 -t_srs EPSG:2154 -dialect 'SQLite' -sql 'select GEOMETRY,code from t31tcj' {} {}".format( ground_truth, self.in_vector) run(cmd) # cfg instance runs = 2 cfg = SCF.serviceConfigFile(self.config_test) cfg.setParam('chain', 'outputPath', test_output) cfg.setParam('chain', 'groundTruth', ground_truth) cfg.setParam('chain', 'dataField', "code") cfg.setParam('chain', 'cloud_threshold', 0) cfg.setParam('chain', 'merge_final_classifications', False) cfg.setParam('chain', 'runs', runs) cfg.setParam('GlobChain', 'proj', "EPSG:2154") cfg.setParam('chain', 'regionPath', self.ref_region) IOTA2Directory.generate_directories(test_output, check_inputs=False) # prepare expected function inputs t31tcj_feat_dir = os.path.join(self.test_working_directory, "IOTA2_dir_VectorFormatting", "features", "T31TCJ") os.mkdir(t31tcj_feat_dir) # prepare ref img t31tcj_ref_img = os.path.join(t31tcj_feat_dir, "MaskCommunSL.tif") shutil.copy(self.ref_img, t31tcj_ref_img) # prepare envelope envelope_name = "T31TCJ.shp" envelope_path = os.path.join(self.test_working_directory, "IOTA2_dir_VectorFormatting", "envelope", envelope_name) fut.cpShapeFile(self.ref_region.replace(".shp", ""), envelope_path.replace(".shp", ""), [".prj", ".shp", ".dbf", ".shx"]) changeName(envelope_path, "region", "FID") # prepare cloud mask cloud_name = "CloudThreshold_0.shp" cloud_path = os.path.join(self.test_working_directory, "IOTA2_dir_VectorFormatting", "features", "T31TCJ", cloud_name) fut.cpShapeFile(self.ref_region.replace(".shp", ""), cloud_path.replace(".shp", ""), [".prj", ".shp", ".dbf", ".shx"]) changeName(cloud_path, "region", "cloud") # launch function ratio = cfg.getParam('chain', 'ratio') random_seed = cfg.getParam('chain', 'random_seed') enable_cross_validation = cfg.getParam("chain", "enableCrossValidation") enable_split_ground_truth = cfg.getParam('chain', 'splitGroundTruth') fusion_merge_all_validation = cfg.getParam( 'chain', 'fusionOfClassificationAllSamplesValidation') merge_final_classifications = cfg.getParam( 'chain', 'merge_final_classifications') merge_final_classifications_ratio = cfg.getParam( 'chain', 'merge_final_classifications_ratio') region_vec = cfg.getParam('chain', 'regionPath') epsg = int(cfg.getParam('GlobChain', 'proj').split(":")[-1]) region_field = (cfg.getParam('chain', 'regionField')) vector_formatting("T31TCJ", test_output, ground_truth, "code", 0, ratio, random_seed, enable_cross_validation, enable_split_ground_truth, fusion_merge_all_validation, runs, epsg, region_field, merge_final_classifications, merge_final_classifications_ratio, region_vec, working_directory=None) # assert nb_features_origin = len( fut.getFieldElement(ground_truth, driverName="ESRI Shapefile", field="code", mode="all", elemType="str")) test_vector = fut.FileSearch_AND( os.path.join(test_output, "formattingVectors"), True, "T31TCJ.shp")[0] nb_features_test = len( fut.getFieldElement(test_vector, driverName="ESRI Shapefile", field="code", mode="all", elemType="str")) # check nb features self.assertTrue(nb_features_origin == nb_features_test, msg="wrong number of features") # check fields origin_fields = fut.get_all_fields_in_shape(ground_truth) test_fields = fut.get_all_fields_in_shape(test_vector) new_fields = ['region', 'originfid', 'seed_0', 'seed_1', 'tile_o'] expected_fields = origin_fields + new_fields self.assertTrue(len(expected_fields) == len(test_fields)) self.assertTrue(all(field in test_fields for field in expected_fields))
def confusion_sar_optical_parameter(iota2_dir: str, logger: Optional[Logger] = LOGGER): """ return a list of tuple containing the classification and the associated shapeFile to compute a confusion matrix """ ref_vectors_dir = os.path.join(iota2_dir, "dataAppVal", "bymodels") classifications_dir = os.path.join(iota2_dir, "classif") vector_seed_pos = 4 vector_tile_pos = 0 vector_model_pos = 2 classif_seed_pos = 5 classif_tile_pos = 1 classif_model_pos = 3 vectors = fu.FileSearch_AND(ref_vectors_dir, True, ".shp") classifications = fu.FileSearch_AND(classifications_dir, True, "Classif", "model", "seed", ".tif") group = [] for vector in vectors: vec_name = os.path.basename(vector) seed = vec_name.split("_")[vector_seed_pos] tile = vec_name.split("_")[vector_tile_pos] model = vec_name.split("_")[vector_model_pos] key = (seed, tile, model) fields = fu.get_all_fields_in_shape(vector) if len( fu.getFieldElement(vector, driverName="ESRI Shapefile", field=fields[0], mode="all", elemType="str")) != 0: group.append((key, vector)) for classif in classifications: classif_name = os.path.basename(classif) seed = classif_name.split("_")[classif_seed_pos].split(".tif")[0] tile = classif_name.split("_")[classif_tile_pos] model = classif_name.split("_")[classif_model_pos] key = (seed, tile, model) group.append((key, classif)) # group by keys groups_param_buff = [param for key, param in fu.sortByFirstElem(group)] groups_param = [] # check if all parameter to find are found. for group in groups_param_buff: if len(group) != 3: logger.debug(f"all parameter to use Dempster-Shafer fusion, " f"not found : {group}") else: groups_param.append(group) # output output_parameters = [] for param in groups_param: for sub_param in param: if ".shp" in sub_param: ref_vector = sub_param elif "SAR.tif" in sub_param: classif_sar = sub_param elif ".tif" in sub_param and "SAR.tif" not in sub_param: classif_opt = sub_param output_parameters.append((ref_vector, classif_opt)) output_parameters.append((ref_vector, classif_sar)) return output_parameters
def splitInSubSets(vectoFile, dataField, regionField, ratio=0.5, seeds=1, driver_name="SQLite", learningFlag="learn", validationFlag="validation", unusedFlag="unused", crossValidation=False, splitGroundTruth=True, random_seed=None): """ This function is dedicated to split a shape into N subsets of training and validations samples by adding a new field by subsets (seed_X) containing 'learn', 'validation' or 'unused' Parameters ---------- vectoFile : string input vector file dataField : string field which discriminate class regionField : string field which discriminate region ratio : int ratio between learn and validation features seeds : int number of random splits driver_name : string OGR layer name learningFlag : string learning flag validationFlag : string validation flag unusedFlag : string unused flag crossValidation : bool enable cross validation split splitGroundTruth enable the ground truth split random_seed : int random seed """ driver = ogr.GetDriverByName(driver_name) source = driver.Open(vectoFile, 1) layer = source.GetLayer(0) class_avail = fut.getFieldElement(vectoFile, driverName=driver_name, field=dataField, mode="unique", elemType="int") region_avail = fut.getFieldElement(vectoFile, driverName=driver_name, field=regionField, mode="unique", elemType="str") all_fields = fut.get_all_fields_in_shape(vectoFile, driver=driver_name) fid_area = [(f.GetFID(), f.GetGeometryRef().GetArea()) for f in layer] fid = [fid_ for fid_, area in fid_area] id_learn = [] id_val = [] if crossValidation: id_CrossVal = get_CrossValId(layer, dataField, class_avail, seeds, regionField, region_avail) for seed in range(seeds): source = driver.Open(vectoFile, 1) layer = source.GetLayer(0) seed_field_name = "seed_" + str(seed) seed_field = ogr.FieldDefn(seed_field_name, ogr.OFTString) if seed_field_name not in all_fields: layer.CreateField(seed_field) if crossValidation is False: random_seed_number = None if random_seed is not None: random_seed_number = random_seed + seed id_learn, id_val = get_random_poly(layer, dataField, class_avail, ratio, regionField, region_avail, random_seed_number) else: id_learn = id_CrossVal[seed] if splitGroundTruth is False: id_learn = id_learn.union(id_val) for i in fid: flag = None if i in id_learn: flag = learningFlag if seed == seeds - 1 and crossValidation: flag = validationFlag elif crossValidation: flag = unusedFlag elif i in id_val: flag = validationFlag feat = layer.GetFeature(i) feat.SetField(seed_field_name, flag) layer.SetFeature(feat) i = layer = None
def extract_maj_vote_samples(vec_in: str, vec_out: str, ratio_to_keep: float, data_field: str, region_field: str, driver_name: Optional[str] = "ESRI Shapefile"): """ dedicated to extract samples by class according to a ratio. Samples are remove from vec_in and place in vec_out Parameters ---------- vec_in : string path to a shapeFile (.shp) vec_out : string path to a sqlite (.sqlite) ratio_to_keep [float] percentage of samples to extract ratio_to_keep = 0.1 mean extract 10% of each class in each regions dataField : string field containing class labels regionField : string field containing regions labels driver_name : string OGR driver """ from osgeo import ogr from iota2.Common import FileUtils as fut from iota2.Sampling import SplitInSubSets as subset from iota2.Common.Utils import run class_avail = fut.getFieldElement(vec_in, driverName=driver_name, field=data_field, mode="unique", elemType="int") region_avail = fut.getFieldElement(vec_in, driverName=driver_name, field=region_field, mode="unique", elemType="str") driver = ogr.GetDriverByName(driver_name) source = driver.Open(vec_in, 1) layer = source.GetLayer(0) sample_id_to_extract, _ = subset.get_random_poly(layer, data_field, class_avail, ratio_to_keep, region_field, region_avail) # Create new file with targeted FID fid_samples_in = built_where_sql_exp(sample_id_to_extract, clause="in") cmd = f"ogr2ogr -where '{fid_samples_in}' -f 'SQLite' {vec_out} {vec_in}" run(cmd) # remove in vec_in targeted FID vec_in_rm = vec_in.replace(".shp", "_tmp.shp") fid_samples_not_in = built_where_sql_exp(sample_id_to_extract, clause="not in") cmd = f"ogr2ogr -where '{fid_samples_not_in}' {vec_in_rm} {vec_in}" run(cmd) fut.removeShape(vec_in.replace(".shp", ""), [".prj", ".shp", ".dbf", ".shx"]) cmd = f"ogr2ogr {vec_in} {vec_in_rm}" run(cmd) fut.removeShape(vec_in_rm.replace(".shp", ""), [".prj", ".shp", ".dbf", ".shx"])
def generate_samples_classif_mix(folder_sample: str, working_directory: str, train_shape: str, path_wd: str, output_path: str, annual_crop: List[Union[str, int]], all_class: List[Union[str, int]], data_field: str, previous_classif_path: str, proj: int, runs: Union[str, int], enable_cross_validation: bool, region_field: str, validity_threshold: int, target_resolution: int, sar_optical_post_fusion: bool, sensors_parameters: sensors_params_type, folder_features: Optional[str] = None, ram: Optional[int] = 128, w_mode: Optional[bool] = False, test_mode: Optional[bool] = False, test_shape_region: Optional[str] = None, sample_sel: Optional[str] = None, mode: Optional[str] = "usually", logger: Optional[Logger] = LOGGER): """ usage : from one classification, chose randomly annual sample merge with non annual sample and extract features. IN: folderSample [string] : output folder workingDirectory [string] : computation folder trainShape [string] : vector shape (polygons) to sample pathWd [string] : if different from None, enable HPC mode (copy at ending) featuresPath [string] : path to all stack annualCrop [list of string/int] : list containing annual crops ex : [11,12] AllClass [list of string/int] : list containing all classes in vector shape ex : [11,12,51..] cfg [string] : configuration file class previousClassifPath [string] : path to the iota2 output directory which generate previous classification dataField [string] : data's field into vector shape testMode [bool] : enable testMode -> iota2tests.py testPrevConfig [string] : path to the configuration file which generate previous classification testShapeRegion [string] : path to the shapefile representing region in the tile. testFeaturePath [string] : path to the stack of data OUT: samples [string] : vector shape containing points """ from iota2.Sampling.SamplesSelection import prepare_selection from iota2.Sampling import GenAnnualSamples as genAS if os.path.exists( os.path.join( folder_sample, train_shape.split("/")[-1].replace(".shp", "_Samples.sqlite"))): return None if enable_cross_validation: runs = runs - 1 features_path = os.path.join(output_path, "features") sample_sel_directory = os.path.join(output_path, "samplesSelection") work_dir = sample_sel_directory if working_directory: work_dir = working_directory data_field = data_field.lower() current_tile = (os.path.splitext(os.path.basename(train_shape))[0]) if sample_sel: sample_selection = sample_sel else: sample_selection = prepare_selection(sample_sel_directory, current_tile) non_annual_shape = os.path.join( work_dir, "{}_nonAnnual_selection.sqlite".format(current_tile)) annual_shape = os.path.join( work_dir, "{}_annual_selection.sqlite".format(current_tile)) # garde toutes les classes pérennes nb_feat_nannu = extract_class(sample_selection, non_annual_shape, all_class, data_field) regions = fu.getFieldElement(train_shape, driverName="ESRI Shapefile", field=region_field, mode="unique", elemType="str") print(sample_selection) print(train_shape) # avoir la répartition des classes anuelles par seed et par region # -> pouvoir faire annu_repartition[11][R][S] annu_repartition = get_repartition(sample_selection, annual_crop, data_field, region_field, regions, runs) nb_feat_annu = get_number_annual_sample(annu_repartition) # raster ref (in order to extract ROIs) ref = fu.FileSearch_AND(os.path.join(features_path, current_tile), True, "MaskCommunSL.tif")[0] if nb_feat_nannu > 0: all_coord = get_points_coord_in_shape(non_annual_shape, "SQLite") else: all_coord = [0] classification_raster = extract_roi(os.path.join(previous_classif_path, "final", "Classif_Seed_0.tif"), current_tile, path_wd, output_path, f"Classif_{current_tile}", ref, test_mode, test_output=folder_sample) validity_raster = extract_roi(os.path.join(previous_classif_path, "final", "PixelsValidity.tif"), current_tile, path_wd, output_path, f"Cloud{current_tile}", ref, test_mode, test_output=folder_sample) # build regions mask into the tile masks = [ get_region_model_in_tile(current_tile, current_region, output_path, path_wd, classification_raster, region_field, test_mode, test_shape_region, test_output_folder=folder_sample) for current_region in regions ] if nb_feat_annu > 0: annual_points = genAS.genAnnualShapePoints( all_coord, "SQLite", working_directory, target_resolution, annual_crop, data_field, current_tile, validity_threshold, validity_raster, classification_raster, masks, train_shape, annual_shape, proj, region_field, runs, annu_repartition) merge_name = train_shape.split("/")[-1].replace(".shp", "_selectionMerge") sample_selection = os.path.join(working_directory, f"{merge_name}.sqlite") if (nb_feat_nannu > 0) and (nb_feat_annu > 0 and annual_points): fu.mergeSQLite(merge_name, working_directory, [non_annual_shape, annual_shape]) elif (nb_feat_nannu > 0) and not (nb_feat_annu > 0 and annual_points): # If not annual samples can be added then annual classes are ignored shutil.copy(non_annual_shape, sample_selection) elif not (nb_feat_nannu > 0) and (nb_feat_annu > 0 and annual_points): # If not non annual samples are found then use all annual samples shutil.copy(annual_shape, sample_selection) samples = os.path.join( working_directory, train_shape.split("/")[-1].replace(".shp", "_Samples.sqlite")) sample_extr, dep_tmp = get_features_application( sample_selection, working_directory, samples, data_field, output_path, sar_optical_post_fusion, sensors_parameters, ram, mode) # sampleExtr.ExecuteAndWriteOutput() multi_proc = mp.Process(target=executeApp, args=[sample_extr]) multi_proc.start() multi_proc.join() split_vectors = split_vector_by_region(in_vect=samples, output_dir=working_directory, region_field=region_field, runs=int(runs), driver="SQLite", proj_in="EPSG:" + str(proj), proj_out="EPSG:" + str(proj)) if test_mode: split_vectors = None if path_wd and os.path.exists(samples): for sample in split_vectors: shutil.copy(sample, folder_sample) if os.path.exists(non_annual_shape): os.remove(non_annual_shape) if os.path.exists(annual_shape): os.remove(annual_shape) if w_mode: target_directory = os.path.join(folder_features, current_tile) if not os.path.exists(target_directory): try: os.mkdir(target_directory) except OSError: logger.warning(f"{target_directory} allready exists") try: os.mkdir(os.path.join(target_directory, "tmp")) except OSError: logger.warning(f"{target_directory}/tmp allready exists") from_dir = os.path.join(working_directory, current_tile, "tmp") to_dir = os.path.join(target_directory, "tmp") if os.path.exists(from_dir): fu.updateDirectory(from_dir, to_dir) os.remove(samples) os.remove(classification_raster) os.remove(validity_raster) for mask in masks: os.remove(mask) return split_vectors
def generate_samples(train_shape_dic, path_wd, data_field: str, output_path: str, annual_crop: List[Union[str, int]], crop_mix: bool, auto_context_enable: bool, region_field: str, proj: Union[int, str], enable_cross_validation: bool, runs: int, sensors_parameters: sensors_params_type, sar_optical_post_fusion: bool, samples_classif_mix: Optional[bool] = False, output_path_annual: Optional[str] = None, ram: Optional[int] = 128, w_mode: Optional[int] = False, folder_annual_features: Optional[str] = None, previous_classif_path: Optional[str] = None, validity_threshold: Optional[int] = None, target_resolution: Optional[int] = None, test_mode: Optional[bool] = False, test_shape_region: Optional[str] = None, sample_selection: Optional[str] = None, logger: Optional[Logger] = LOGGER): """ usage : generation of vector shape of points with features IN: train_shape_dic [dict] : dictionnary containing a shape file in value pathWd [string] : working directory data_field [str] : data field name output_path [str] : the ouput path annual_crop: List[str or int] : the list of annual crop crop_mix [bool] : activate the crop mix mode auto_context_enable [bool] : activate auto context mode region_field [str] : the region field name proj [int] : the working projection enable_cross_validation [bool] : enable cross validation runs: int, samples_classif_mix: Optional[bool] = False, output_path_annual: Optional[str] = None, ram=128, w_mode=False, folder_annual_features=None, previous_classif_path: Optional[str] = None, validity_threshold: Optional[int] = None, target_resolution: Optional[int] = None, testMode [bool] : enable test features [string] : path to features allready compute (refl + NDVI ...) testFeaturePath [string] : path to stack of data without features testAnnualFeaturePath [string] : path to stack of data without features testPrevConfig [string] : path to a configuration file testShapeRegion [string] : path to a vector shapeFile, representing region in tile OUT: samples [string] : path to output vector shape """ # mode must be "usally" or "SAR" mode = list(train_shape_dic.keys())[0] train_shape = train_shape_dic[mode] all_class = fu.getFieldElement(train_shape, "ESRI Shapefile", data_field, mode="unique", elemType="str") for current_class in annual_crop.data: try: all_class.remove(str(current_class)) except ValueError: logger.warning( f"Class {current_class} doesn't exist in {train_shape}") logger.info(f"All classes: {all_class}") logger.info(f"Annual crop: {annual_crop}") folder_features = os.path.join(output_path, "features") folder_sample = os.path.join(output_path, "learningSamples") if not os.path.exists(folder_sample): try: os.mkdir(folder_sample) except OSError: logger.warning(f"{folder_sample} allready exists") working_directory = folder_sample if path_wd: working_directory = path_wd if crop_mix is False or auto_context_enable: samples = generate_samples_simple( folder_sample, working_directory, train_shape, path_wd, data_field, region_field, output_path, runs, proj, enable_cross_validation, sensors_parameters, sar_optical_post_fusion, ram, w_mode, folder_features, sample_selection, mode) elif crop_mix is True and samples_classif_mix is False: samples = generate_samples_crop_mix( folder_sample, working_directory, output_path, output_path_annual, train_shape, path_wd, annual_crop, all_class, data_field, folder_features, folder_annual_features, enable_cross_validation, runs, region_field, proj, sar_optical_post_fusion, sensors_parameters, ram, w_mode, test_mode, sample_selection, mode) elif crop_mix is True and samples_classif_mix is True: if isinstance(proj, str): proj = int(proj.split(":")[-1]) samples = generate_samples_classif_mix( folder_sample, working_directory, train_shape, path_wd, output_path, annual_crop, all_class, data_field, previous_classif_path, proj, runs, enable_cross_validation, region_field, validity_threshold, target_resolution, sar_optical_post_fusion, sensors_parameters, folder_features, ram, w_mode, test_mode, test_shape_region, sample_selection, mode) if test_mode: return samples
def get_regions_area(vectors: List[str], regions: List[str], formatting_vectors_dir: str, working_directory: [str], region_field: [str]) -> Dict[str, List[str]]: """ usage : get all models polygons area IN vectors [list of strings] : path to vector file regions [list of string] : all possible regions formatting_vectors_dir [string] : path to /iota2/formattingVectors workingDirectory [string] region_field [string] OUT dico_region_area [dict] : dictionnary containing area by region's key """ from iota2.Common.Utils import run from iota2.Common import FileUtils as fut import sqlite3 as db tmp_data = [] # init dict dico_region_area = {} dico_region_tile = {} for reg in regions: dico_region_area[reg] = 0.0 dico_region_tile[reg] = [] for vector in vectors: # move vectors to sqlite (faster format) transform_dir = formatting_vectors_dir if working_directory: transform_dir = working_directory transform_vector_name = os.path.split(vector)[-1].replace( ".shp", ".sqlite") sqlite_vector = os.path.join(transform_dir, transform_vector_name) cmd = "ogr2ogr -f 'SQLite' {} {}".format(sqlite_vector, vector) run(cmd) tmp_data.append(sqlite_vector) region_vector = fut.getFieldElement(sqlite_vector, driverName="SQLite", field=region_field, mode="unique", elemType="str") conn = db.connect(sqlite_vector) conn.enable_load_extension(True) conn.load_extension("mod_spatialite") cursor = conn.cursor() table_name = (transform_vector_name.replace(".sqlite", "")).lower() for current_region in region_vector: sql_clause = ( f"SELECT AREA(GEOMFROMWKB(GEOMETRY)) FROM " f"{table_name} WHERE {region_field}={current_region}") cursor.execute(sql_clause) res = cursor.fetchall() dico_region_area[current_region] += sum([area[0] for area in res]) if vector not in dico_region_tile[current_region]: dico_region_tile[current_region].append(sqlite_vector) conn = cursor = None return dico_region_area, dico_region_tile, tmp_data