Exemple #1
0
    def test_update_flags(self):
        """
        """
        from iota2.Sampling.SamplesSelection import update_flags

        # prepare test input
        test_vector_name = "T31TCJ_samples_region_1_seed_1_selection.sqlite"
        test_vector_table = "t31tcj_samples_region_1_seed_0_selection"
        test_vector = os.path.join(self.test_working_directory,
                                   test_vector_name)
        shutil.copy(self.selection_ref, test_vector)

        update_flags(test_vector, 2, table_name=test_vector_table)

        # assert
        updated_flag = "XXXX"
        nb_features_origin = len(
            fut.getFieldElement(self.selection_ref,
                                driverName="SQLite",
                                field="seed_0",
                                mode="all",
                                elemType="str"))
        features_test = fut.getFieldElement(test_vector,
                                            driverName="SQLite",
                                            field="seed_0",
                                            mode="all",
                                            elemType="str")
        nb_features_test_updated = features_test.count(updated_flag)
        self.assertTrue(nb_features_origin == nb_features_test_updated,
                        msg="update features failed")
    def test_vector_splits_cross_validation(self):
        from iota2.Sampling import SplitInSubSets as VS
        from iota2.Common import FileUtils as fut
        # We execute the function splitInSubSets()
        new_region_shape = self.new_regions_shapes[0]
        VS.splitInSubSets(new_region_shape,
                          self.data_field,
                          self.region_field,
                          self.ratio,
                          self.seeds,
                          "ESRI Shapefile",
                          crossValidation=True,
                          random_seed=0)

        seed0 = fut.getFieldElement(new_region_shape,
                                    driverName="ESRI Shapefile",
                                    field="seed_0",
                                    mode="all",
                                    elemType="str")
        seed1 = fut.getFieldElement(new_region_shape,
                                    driverName="ESRI Shapefile",
                                    field="seed_1",
                                    mode="all",
                                    elemType="str")

        for elem in seed0:
            self.assertTrue(elem in ["unused", "learn"],
                            msg="flag not in ['unused', 'learn']")
        for elem in seed1:
            self.assertTrue(elem in ["unused", "validation"],
                            msg="flag not in ['unused', 'validation']")
    def test_split_vector_by_region(self):
        """
        test : split a vector by the region he belongs to
        """
        from iota2.Sampling.VectorFormatting import split_vector_by_region
        from iota2.Common.Utils import run
        from iota2.Tests.UnitTests.Iota2Tests import random_update

        # define inputs
        nb_features_origin = len(
            fut.getFieldElement(self.in_vector,
                                driverName="ESRI shapefile",
                                field="region",
                                mode="all",
                                elemType="str"))
        nb_features_new_region = 5
        test_vector_name = "T31TCJ_Samples.sqlite"
        test_vector = os.path.join(self.test_working_directory,
                                   test_vector_name)
        cmd = "ogr2ogr -nln output -f SQLite {} {}".format(
            test_vector, self.in_vector)
        run(cmd)

        random_update(test_vector, "output", "seed_0", "learn",
                      nb_features_origin)
        random_update(test_vector, "output", "region", "2",
                      nb_features_new_region)

        output_dir = self.test_working_directory
        region_field = "region"

        # launch function
        split_vector_by_region(test_vector,
                               output_dir,
                               region_field,
                               runs=1,
                               driver="SQLite")
        # assert
        vector_reg_1 = fut.FileSearch_AND(self.test_working_directory, True,
                                          "region_1")[0]
        vector_reg_2 = fut.FileSearch_AND(self.test_working_directory, True,
                                          "region_2")[0]

        feat_vect_reg_1 = len(
            fut.getFieldElement(vector_reg_1,
                                driverName="SQLite",
                                field="region",
                                mode="all",
                                elemType="str"))
        feat_vect_reg_2 = len(
            fut.getFieldElement(vector_reg_2,
                                driverName="SQLite",
                                field="region",
                                mode="all",
                                elemType="str"))

        self.assertTrue(nb_features_new_region == feat_vect_reg_2)
        self.assertTrue(nb_features_origin == feat_vect_reg_1 +
                        feat_vect_reg_2)
Exemple #4
0
def config_model(outputPath, region_field):
    """
    usage : determine which model will class which tile
    """
    #const
    output = None
    pos_tile = 0
    formatting_vec_dir = os.path.join(outputPath, "formattingVectors")
    samples = fu.FileSearch_AND(formatting_vec_dir, True, ".shp")

    #init
    all_regions = []
    for sample in samples:
        tile_name = os.path.splitext(
            os.path.basename(sample))[0].split("_")[pos_tile]
        regions = fu.getFieldElement(sample,
                                     driverName="ESRI Shapefile",
                                     field=region_field,
                                     mode="unique",
                                     elemType="str")
        for region in regions:
            all_regions.append((region, tile_name))

    #{'model_name':[TileName, TileName...],'...':...,...}
    model_tiles = dict(fu.sortByFirstElem(all_regions))

    #add tiles if they are missing by checking in /shapeRegion/ directory
    shape_region_dir = os.path.join(outputPath, "shapeRegion")
    shape_region_path = fu.FileSearch_AND(shape_region_dir, True, ".shp")

    #check if there is actually polygons
    shape_regions = [
        elem for elem in shape_region_path if len(
            fu.getFieldElement(elem,
                               driverName="ESRI Shapefile",
                               field=region_field,
                               mode="all",
                               elemType="str")) >= 1
    ]
    for shape_region in shape_regions:
        tile = os.path.splitext(
            os.path.basename(shape_region))[0].split("_")[-1]
        region = os.path.splitext(
            os.path.basename(shape_region))[0].split("_")[-2]
        for model_name, tiles_model in list(model_tiles.items()):
            if model_name.split("f")[0] == region and tile not in tiles_model:
                tiles_model.append(tile)

    #Construct output file string
    output = "AllModel:\n["
    for model_name, tiles_model in list(model_tiles.items()):
        output_tmp = "\n\tmodelName:'{}'\n\ttilesList:'{}'".format(
            model_name, "_".join(tiles_model))
        output = output + "\n\t{" + output_tmp + "\n\t}"
    output += "\n]"

    return output
    def test_extract_maj_vote_samples(self):
        """
        test the extraction of samples by class according to a ratio
        """
        from iota2.Sampling.VectorFormatting import extract_maj_vote_samples
        from collections import Counter

        # define inputs
        in_vector_name = os.path.basename(self.in_vector)
        extracted_vector_name = "extracted_samples.sqlite"
        in_vector = os.path.join(self.test_working_directory, in_vector_name)
        extracted_vector = os.path.join(self.test_working_directory,
                                        extracted_vector_name)
        fut.cpShapeFile(self.in_vector.replace(".shp", ""),
                        in_vector.replace(".shp", ""),
                        [".prj", ".shp", ".dbf", ".shx"])

        # launch function
        dataField = "code"
        regionField = "region"
        extraction_ratio = 0.5
        extract_maj_vote_samples(in_vector, extracted_vector, extraction_ratio,
                                 dataField, regionField)
        # assert
        features_origin = fut.getFieldElement(self.in_vector,
                                              driverName="ESRI Shapefile",
                                              field=dataField,
                                              mode="all",
                                              elemType="str")
        by_class_origin = Counter(features_origin)

        features_in_vector = fut.getFieldElement(in_vector,
                                                 driverName="ESRI Shapefile",
                                                 field=dataField,
                                                 mode="all",
                                                 elemType="str")
        by_class_in_vector = Counter(features_in_vector)

        features_extract_vector = fut.getFieldElement(extracted_vector,
                                                      driverName="SQLite",
                                                      field=dataField,
                                                      mode="all",
                                                      elemType="str")
        by_class_extract_vector = Counter(features_extract_vector)

        buff = []
        for class_name, class_count in list(by_class_origin.items()):
            buff.append(by_class_in_vector[class_name] == extraction_ratio *
                        class_count)

        self.assertTrue(all(buff), msg="extraction of samples failed")
Exemple #6
0
    def test_split_selection(self):
        """
        test dedicated to check if split_sel function works
        """
        from iota2.Sampling.SamplesSelection import split_sel
        from Iota2Tests import random_update
        from TestsUtils import rename_table

        # prepare test input
        test_vector_name = "samples_region_1_seed_0.sqlite"
        test_vector_table = "t31tcj_samples_region_1_seed_0_selection"
        test_vector = os.path.join(self.test_working_directory,
                                   test_vector_name)
        shutil.copy(self.selection_ref, test_vector)

        # update "nb_feat" features to a new "new_tile_name" tile's name
        nb_feat = 10
        new_tile_name = "T31TDJ"
        random_update(test_vector, test_vector_table, "tile_o", new_tile_name,
                      nb_feat)
        rename_table(test_vector,
                     old_table_name=test_vector_table,
                     new_table_name="output")
        # launch function
        new_files = split_sel(test_vector, ["T31TCJ", new_tile_name],
                              self.test_working_directory, "EPSG:2154")
        # assert
        nb_features_origin = len(
            fut.getFieldElement(self.selection_ref,
                                driverName="SQLite",
                                field="tile_o",
                                mode="all",
                                elemType="str"))
        nb_features_t31tcj = len(
            fut.getFieldElement(new_files[0],
                                driverName="SQLite",
                                field="tile_o",
                                mode="all",
                                elemType="str"))
        nb_features_t31tdj = len(
            fut.getFieldElement(new_files[1],
                                driverName="SQLite",
                                field="tile_o",
                                mode="all",
                                elemType="str"))
        self.assertTrue(nb_features_t31tdj == nb_feat,
                        msg="split samples selection failed")
        self.assertTrue(nb_features_origin == nb_features_t31tdj +
                        nb_features_t31tcj,
                        msg="split samples selection failed")
 def test_iota2_augmentation(self):
     """Test data augmentation workflow
     """
     from collections import Counter
     class_augmentation_balance = DataAugmentation.SamplesAugmentationCounter(
         self.class_count, mode="balance", minNumber=None, byClass=None)
     DataAugmentation.DoAugmentation(self.vector_test,
                                     class_augmentation_balance,
                                     strategy="jitter",
                                     field="code",
                                     excluded_fields=[],
                                     Jstdfactor=10,
                                     Sneighbors=None,
                                     workingDirectory=None)
     class_count_test = Counter(
         fut.getFieldElement(self.vector_test,
                             driverName="SQLite",
                             field="code",
                             mode="all",
                             elemType="int"))
     samples_number = self.class_count[max(
         self.class_count, key=lambda key: self.class_count[key])]
     self.assertTrue(
         all([
             samples_number == v for k, v in list(class_count_test.items())
         ]))
Exemple #8
0
def extract_class(vec_in: str, vec_out: str, target_class: List[str],
                  data_field: str) -> int:
    """
    Extract class
    IN:
        vec_in: str
        vec_out: str
        target_class:List[str]
        data_field: str
    """
    from iota2.Common.Utils import run

    if type(target_class) != type(list()):
        target_class = target_class.data

    where = " OR ".join(
        ["{}={}".format(data_field.lower(), klass) for klass in target_class])
    cmd = (f"ogr2ogr -f 'SQLite' -nln output -where '{where}' "
           f"{vec_out} {vec_in}")
    run(cmd)

    return len(
        fu.getFieldElement(vec_out,
                           driverName="SQLite",
                           field=data_field.lower(),
                           mode="all",
                           elemType="int"))
Exemple #9
0
def split(regions_split: Dict[str, int], regions_tiles: Dict[str, List[str]],
          data_field: str, region_field: str) -> List[str]:
    """
    function dedicated to split to huge regions in sub-regions
    Parameters
    ----------
    regions_split: dict[string, int]
    regions_tiles: dict[str, List[str]]
    Return
    ------
    list(str)
    """
    from iota2.Common import FileUtils as fut
    updated_vectors = []

    for region, fold in list(regions_split.items()):
        vector_paths = regions_tiles[region]
        for vec in vector_paths:
            # init dict new regions
            new_regions_dict = {}
            for sub_fold in range(fold):
                # new region's name are define here
                new_regions_dict["{}f{}".format(region, sub_fold + 1)] = []

            # get possible class
            class_vector = fut.getFieldElement(vec,
                                               driverName="SQLite",
                                               field=data_field,
                                               mode="unique",
                                               elemType="str")
            dic_class = {}
            # get FID values for all class of current region into
            # the current tile
            for c_class in class_vector:
                dic_class[c_class] = get_fid_values(vec, data_field,
                                                    region_field, region,
                                                    c_class)

            nb_feat = 0
            for _, fid_cl in list(dic_class.items()):
                if fid_cl:
                    fid_folds = fut.splitList(fid_cl, fold)
                    # fill new_regions_dict
                    for i, fid_fold in enumerate(fid_folds):
                        new_regions_dict[f"{region}f{i+1}"] += fid_fold
                nb_feat += len(fid_cl)
            update_vector(vec, region_field, new_regions_dict)
            if vec not in updated_vectors:
                updated_vectors.append(vec)

    return updated_vectors
Exemple #10
0
def get_models(formatting_vector_directory: str, region_field: str,
               runs: int) -> List[Tuple[str, List[str], int]]:
    """
    usage :
    describe samples spatial repartition
    function use to determine with shapeFile as to be merged in order to
    compute statistics thanks to otb_SampleSelection

    OUT:
    regions_tiles_seed [list] :
    example
    regions_tiles_seed = [('1', ['T1', 'T2'], 0), ('1', ['T1', T2], 1),
                          ('2', ['T2', 'T3], 0), ('2', ['T2', 'T3], 1)]
    mean the region '1' is present in tiles 'T1' and 'T2' in run 0 and 1
    and region '2' in 'T2', 'T3' in runs 0 and 1
    """
    # the way of getting region could be improve ?
    tiles = fut.FileSearch_AND(formatting_vector_directory, True, ".shp")
    region_tile = []
    all_regions_in_run = []
    for tile in tiles:
        all_regions = []
        tile_name = os.path.splitext(os.path.basename(tile))[0]
        r_tmp = fut.getFieldElement(tile,
                                    driverName="ESRI Shapefile",
                                    field=region_field,
                                    mode="unique",
                                    elemType="str")
        for r_tile in r_tmp:
            if r_tile not in all_regions:
                all_regions.append(r_tile)

        for region in all_regions:
            if region not in all_regions_in_run:
                all_regions_in_run.append(region)
            region_tile.append((region, tile_name))

    region_tile_tmp = dict(fut.sortByFirstElem(region_tile))
    region_tile_dic = {}
    for region, region_tiles in list(region_tile_tmp.items()):
        region_tile_dic[region] = list(set(region_tiles))

    all_regions_in_run = sorted(all_regions_in_run)
    regions_tiles_seed = [(region, region_tile_dic[region], run)
                          for run in range(runs)
                          for region in all_regions_in_run]
    return regions_tiles_seed
Exemple #11
0
def region_tile(sample_sel_dir: str):
    """
    """
    tile_field_name = "tile_o"
    region_vectors = fut.FileSearch_AND(sample_sel_dir, True, ".shp")
    output = []
    region_vectors = sorted(region_vectors)
    for region_vector in region_vectors:
        tiles = fut.getFieldElement(region_vector,
                                    driverName="ESRI Shapefile",
                                    field=tile_field_name,
                                    mode="unique",
                                    elemType="str")
        region_name = os.path.splitext(
            os.path.basename(region_vector))[0].split("_")[2]
        seed = os.path.splitext(
            os.path.basename(region_vector))[0].split("_")[4]
        tiles = sorted(tiles)
        for tile in tiles:
            output.append((region_name, seed, tile))
    return output
    def test_vectorSplitsNoSplits(self):
        from iota2.Sampling import SplitInSubSets as VS
        from iota2.Common import FileUtils as fut

        new_region_shape = self.new_regions_shapes[0]
        tile_name = os.path.splitext(os.path.basename(new_region_shape))[0]
        VS.splitInSubSets(new_region_shape,
                          self.data_field,
                          self.region_field,
                          self.ratio,
                          1,
                          "ESRI Shapefile",
                          crossValidation=False,
                          splitGroundTruth=False,
                          random_seed=0)
        seed0 = fut.getFieldElement(new_region_shape,
                                    driverName="ESRI Shapefile",
                                    field="seed_0",
                                    mode="all",
                                    elemType="str")

        for elem in seed0:
            self.assertTrue(elem in ["learn"], msg="flag not in ['learn']")
Exemple #13
0
def gen_raster_ref(vec, output_path, masks_name, working_directory):
    """
    generate the reference image needed to sampleSelection application

    Parameters
    ----------

    vec : string
        path to the shapeFile containing all polygons dedicated to learn
        a model.
    cfg : ServiceConfigFile object
    working_directory : string
        Path to a working directory
    """
    from iota2.Common.Utils import run
    tile_field_name = "tile_o"
    # iota2_dir = cfg.getParam('chain', 'outputPath')
    features_directory = os.path.join(output_path, "features")
    tiles = fut.getFieldElement(vec,
                                driverName="ESRI Shapefile",
                                field=tile_field_name,
                                mode="unique",
                                elemType="str")

    # masks_name = fut.getCommonMaskName(cfg) + ".tif"
    rasters_tiles = [
        fut.FileSearch_AND(os.path.join(features_directory, tile_name), True,
                           masks_name)[0] for tile_name in tiles
    ]
    raster_ref_name = "ref_raster_{}.tif".format(
        os.path.splitext(os.path.basename(vec))[0])
    raster_ref = os.path.join(working_directory, raster_ref_name)
    raster_ref_cmd = "gdal_merge.py -ot Byte -n 0 -createonly -o {} {}".format(
        raster_ref, " ".join(rasters_tiles))
    run(raster_ref_cmd)
    return raster_ref, tiles
Exemple #14
0
def confusion_fusion(input_vector: str, data_field: str, csv_out: str,
                     txt_out: str, csv_path: str, runs: int, crop_mix: bool,
                     annual_crop: List[str],
                     annual_crop_label_replacement: int) -> None:
    """merge otb tile confusion matrix

    Parameters
    ----------
    input_vector: str
        input database
    data_field: str
        data field
    csv_out: str
        output csv file which will contains the merge of matrix
    txt_out: str
        diretory which will contains the resulting file of merged matrix
    csv_path: str
        path to the directory which contains all *.csv files to merge
    runs: int
        number of random learning/validation samples-set
    crop_mix: bool
        inform if cropMix workflow is enable
    annual_crop: list
        list of annual labels
    annual_crop_label_replacement: int
        replace annual labels by annual_crop_label_replacement
    """

    for seed in range(runs):
        #Recherche de toute les classes possible
        all_class = []
        all_class = fu.getFieldElement(input_vector, "ESRI Shapefile",
                                       data_field, "unique")
        all_class = sorted(all_class)

        #Initialisation de la matrice finale
        all_conf = fu.FileSearch_AND(csv_path, True,
                                     "seed_" + str(seed) + ".csv")
        csv = fu.confCoordinatesCSV(all_conf)
        csv_f = fu.sortByFirstElem(csv)

        conf_mat = fu.gen_confusionMatrix(csv_f, all_class)
        if crop_mix:
            write_csv(
                conf_mat, all_class,
                csv_out + "/MatrixBeforeClassMerge_" + str(seed) + ".csv")
            conf_mat, all_class = replace_annual_crop_in_conf_mat(
                conf_mat, all_class, annual_crop,
                annual_crop_label_replacement)
            write_csv(conf_mat, all_class,
                      csv_out + "/Classif_Seed_" + str(seed) + ".csv")
        else:
            write_csv(conf_mat, all_class,
                      csv_out + "/Classif_Seed_" + str(seed) + ".csv")

        nbr_good = conf_mat.trace()
        nbr_sample = conf_mat.sum()

        if nbr_sample > 1:
            overall_acc = float(nbr_good) / float(nbr_sample)
        else:
            overall_acc = 0.0
        kappa = compute_kappa(conf_mat)
        precision = compute_precision_by_class(conf_mat, all_class)
        recall = compute_recall_by_class(conf_mat, all_class)
        f_score = compute_fscore_by_class(precision, recall, all_class)

        write_results(
            f_score, recall, precision, kappa, overall_acc, all_class,
            txt_out + "/ClassificationResults_seed_" + str(seed) + ".txt")
Exemple #15
0
def create_tile_region_masks(tile_region: str, region_field: str,
                             tile_name: str, output_directory: str,
                             origin_name: str, img_ref: str) -> None:
    """
    Parameters
    ----------
    tile_region : string
        path to a SQLite file containing polygons. Each feature is a region
    region_field : string
        region's field
    tile_name : string
        current tile name
    output_directory : string
        directory to save masks
    origin_name : string
        region's field vector file name
    img_ref : string
        path to a tile reference image
    """
    from iota2.Common import FileUtils as fut
    from iota2.Common import OtbAppBank as otb
    from iota2.Common.Utils import run
    all_regions_tmp = fut.getFieldElement(tile_region,
                                          driverName="SQLite",
                                          field=region_field.lower(),
                                          mode="unique",
                                          elemType="str")
    # transform sub region'name into complete region
    # (region '1f1' become region '1')
    all_regions = []
    for region in all_regions_tmp:
        reg = region.split("f")[0]
        all_regions.append(reg)
    region = None
    for region in all_regions:
        output_name = f"{origin_name}_region_{region}_{tile_name}.shp"
        output_path = os.path.join(output_directory, output_name)
        db_name = (os.path.splitext(os.path.basename(tile_region))[0]).lower()
        cmd = (
            f"ogr2ogr -f 'ESRI Shapefile' -sql \"SELECT * FROM {db_name}"
            f" WHERE {region_field}='{region}'\" {output_path} {tile_region}")
        run(cmd)

        path, _ = os.path.splitext(output_path)
        tile_region_raster = "{}.tif".format(path)
        tile_region_app = otb.CreateRasterizationApplication({
            "in":
            output_path,
            "out":
            tile_region_raster,
            "im":
            img_ref,
            "mode":
            "binary",
            "pixType":
            "uint8",
            "background":
            "0",
            "mode.binary.foreground":
            "1"
        })
        tile_region_app.ExecuteAndWriteOutput()
    def test_splitbySets(self):
        """
        test the split of a given vector
        """
        from iota2.Sampling.VectorFormatting import split_by_sets

        # launch function
        s0_val, s0_learn, s1_val, s1_learn = split_by_sets(
            self.in_vector,
            2,
            self.test_working_directory,
            2154,
            2154,
            "T31TCJ",
            cross_valid=False,
            split_ground_truth=True)
        # assert
        seed_0 = fut.getFieldElement(self.in_vector,
                                     driverName="ESRI Shapefile",
                                     field="seed_0",
                                     mode="all",
                                     elemType="str")
        seed_1 = fut.getFieldElement(self.in_vector,
                                     driverName="ESRI Shapefile",
                                     field="seed_1",
                                     mode="all",
                                     elemType="str")

        seed_0_learn_ref = seed_0.count("learn")
        seed_0_val_ref = seed_0.count("validation")
        seed_1_learn_ref = seed_1.count("learn")
        seed_1_val_ref = seed_1.count("validation")

        seed_0_learn_test = len(
            fut.getFieldElement(s0_learn,
                                driverName="SQLite",
                                field="region",
                                mode="all",
                                elemType="str"))
        seed_0_val_test = len(
            fut.getFieldElement(s0_val,
                                driverName="SQLite",
                                field="region",
                                mode="all",
                                elemType="str"))
        seed_1_learn_test = len(
            fut.getFieldElement(s1_learn,
                                driverName="SQLite",
                                field="region",
                                mode="all",
                                elemType="str"))
        seed_1_val_test = len(
            fut.getFieldElement(s1_val,
                                driverName="SQLite",
                                field="region",
                                mode="all",
                                elemType="str"))

        self.assertTrue(seed_0_learn_test == seed_0_learn_ref,
                        msg="wrong number of learning samples in seed 0")
        self.assertTrue(seed_1_learn_test == seed_1_learn_ref,
                        msg="wrong number of learning samples in seed 1")
        self.assertTrue(seed_0_val_test == seed_0_val_ref,
                        msg="wrong number of validation samples in seed 0")
        self.assertTrue(seed_1_val_test == seed_1_val_ref,
                        msg="wrong number of validation samples in seed 1")
Exemple #17
0
def split_vector_by_region(in_vect: str,
                           output_dir: str,
                           region_field: str,
                           runs: Optional[int] = 1,
                           driver: Optional[str] = "ESRI shapefile",
                           proj_in: Optional[str] = "EPSG:2154",
                           proj_out: Optional[str] = "EPSG:2154",
                           mode: Optional[str] = "usually") -> List[str]:
    """
    create new files by regions in input vector.

    Parameters
    ----------
    in_vect : string
        input vector path
    output_dir : string
        path to output directory
    region_field : string
        field in in_vect describing regions
    driver : string
        ogr driver
    proj_in : string
        input projection
    proj_out : string
        output projection
    mode : string
        define if we split SAR sensor to the other
    Return
    ------
    list
        paths to new output vectors
    """
    from iota2.Common import FileUtils as fut
    from iota2.Common.Utils import run
    output_paths = []

    # const
    tile_pos = 0
    learn_flag = "learn"
    table_name = "output"

    vec_name = os.path.split(in_vect)[-1]
    tile = vec_name.split("_")[tile_pos]
    extent = os.path.splitext(vec_name)[-1]

    regions = fut.getFieldElement(in_vect,
                                  driverName=driver,
                                  field=region_field,
                                  mode="unique",
                                  elemType="str")

    table = vec_name.split(".")[0]
    if driver != "ESRI shapefile":
        table = "output"
    # split vector
    for seed in range(runs):
        fields_to_keep = ",".join([
            elem for elem in fut.get_all_fields_in_shape(in_vect, "SQLite")
            if "seed_" not in elem
        ])
        for region in regions:
            out_vec_name_learn = "_".join([
                tile, "region", region, "seed" + str(seed), "Samples_learn_tmp"
            ])
            if mode != "usually":
                out_vec_name_learn = "_".join([
                    tile, "region", region, "seed" + str(seed), "Samples",
                    "SAR", "learn_tmp"
                ])
            output_vec_learn = os.path.join(output_dir,
                                            out_vec_name_learn + extent)
            seed_clause_learn = f"seed_{seed}='{learn_flag}'"
            region_clause = f"{region_field}='{region}'"
            # split vectors by runs and learning sets
            sql_cmd_learn = (f"select * FROM {table} WHERE {seed_clause_learn}"
                             f" AND {region_clause}")
            cmd = (f'ogr2ogr -t_srs {proj_out} -s_srs {proj_in} -nln {table}'
                   f' -f "{driver}" -sql "{sql_cmd_learn}" {output_vec_learn} '
                   f'{in_vect}')
            run(cmd)

            # drop useless column
            sql_clause = f"select GEOMETRY,{fields_to_keep} from {table_name}"
            output_vec_learn_out = output_vec_learn.replace("_tmp", "")

            cmd = (
                f"ogr2ogr -s_srs {proj_in} -t_srs {proj_out} -dialect "
                f"'SQLite' -f 'SQLite' -nln {table_name} -sql '{sql_clause}' "
                f"{output_vec_learn_out} {output_vec_learn}")
            run(cmd)
            output_paths.append(output_vec_learn_out)
            os.remove(output_vec_learn)

    return output_paths
    def test_VectorFormatting(self):
        """
        test vectorFormatting function
        random function is used in Sampling.VectorFormatting.VectorFormatting
        we can only check if there is expected number of features with
        expected fields and some features values
        """
        from iota2.Sampling.VectorFormatting import vector_formatting
        from iota2.Common import ServiceConfigFile as SCF
        from iota2.Common import IOTA2Directory
        from iota2.Common.Utils import run
        from iota2.VectorTools.ChangeNameField import changeName

        # define inputs
        test_output = os.path.join(self.test_working_directory,
                                   "IOTA2_dir_VectorFormatting")
        # prepare ground truth
        ground_truth = os.path.join(self.test_working_directory,
                                    "groundTruth_test.shp")
        cmd = "ogr2ogr -s_srs EPSG:2154 -t_srs EPSG:2154 -dialect 'SQLite' -sql 'select GEOMETRY,code from t31tcj' {} {}".format(
            ground_truth, self.in_vector)
        run(cmd)

        # cfg instance
        runs = 2
        cfg = SCF.serviceConfigFile(self.config_test)
        cfg.setParam('chain', 'outputPath', test_output)
        cfg.setParam('chain', 'groundTruth', ground_truth)
        cfg.setParam('chain', 'dataField', "code")
        cfg.setParam('chain', 'cloud_threshold', 0)
        cfg.setParam('chain', 'merge_final_classifications', False)
        cfg.setParam('chain', 'runs', runs)
        cfg.setParam('GlobChain', 'proj', "EPSG:2154")
        cfg.setParam('chain', 'regionPath', self.ref_region)

        IOTA2Directory.generate_directories(test_output, check_inputs=False)

        # prepare expected function inputs
        t31tcj_feat_dir = os.path.join(self.test_working_directory,
                                       "IOTA2_dir_VectorFormatting",
                                       "features", "T31TCJ")
        os.mkdir(t31tcj_feat_dir)
        # prepare ref img
        t31tcj_ref_img = os.path.join(t31tcj_feat_dir, "MaskCommunSL.tif")
        shutil.copy(self.ref_img, t31tcj_ref_img)
        # prepare envelope
        envelope_name = "T31TCJ.shp"
        envelope_path = os.path.join(self.test_working_directory,
                                     "IOTA2_dir_VectorFormatting", "envelope",
                                     envelope_name)
        fut.cpShapeFile(self.ref_region.replace(".shp", ""),
                        envelope_path.replace(".shp", ""),
                        [".prj", ".shp", ".dbf", ".shx"])
        changeName(envelope_path, "region", "FID")
        # prepare cloud mask
        cloud_name = "CloudThreshold_0.shp"
        cloud_path = os.path.join(self.test_working_directory,
                                  "IOTA2_dir_VectorFormatting", "features",
                                  "T31TCJ", cloud_name)
        fut.cpShapeFile(self.ref_region.replace(".shp", ""),
                        cloud_path.replace(".shp", ""),
                        [".prj", ".shp", ".dbf", ".shx"])
        changeName(cloud_path, "region", "cloud")

        # launch function
        ratio = cfg.getParam('chain', 'ratio')
        random_seed = cfg.getParam('chain', 'random_seed')
        enable_cross_validation = cfg.getParam("chain",
                                               "enableCrossValidation")
        enable_split_ground_truth = cfg.getParam('chain', 'splitGroundTruth')
        fusion_merge_all_validation = cfg.getParam(
            'chain', 'fusionOfClassificationAllSamplesValidation')
        merge_final_classifications = cfg.getParam(
            'chain', 'merge_final_classifications')
        merge_final_classifications_ratio = cfg.getParam(
            'chain', 'merge_final_classifications_ratio')
        region_vec = cfg.getParam('chain', 'regionPath')
        epsg = int(cfg.getParam('GlobChain', 'proj').split(":")[-1])
        region_field = (cfg.getParam('chain', 'regionField'))
        vector_formatting("T31TCJ",
                          test_output,
                          ground_truth,
                          "code",
                          0,
                          ratio,
                          random_seed,
                          enable_cross_validation,
                          enable_split_ground_truth,
                          fusion_merge_all_validation,
                          runs,
                          epsg,
                          region_field,
                          merge_final_classifications,
                          merge_final_classifications_ratio,
                          region_vec,
                          working_directory=None)

        # assert
        nb_features_origin = len(
            fut.getFieldElement(ground_truth,
                                driverName="ESRI Shapefile",
                                field="code",
                                mode="all",
                                elemType="str"))

        test_vector = fut.FileSearch_AND(
            os.path.join(test_output, "formattingVectors"), True,
            "T31TCJ.shp")[0]
        nb_features_test = len(
            fut.getFieldElement(test_vector,
                                driverName="ESRI Shapefile",
                                field="code",
                                mode="all",
                                elemType="str"))
        # check nb features
        self.assertTrue(nb_features_origin == nb_features_test,
                        msg="wrong number of features")

        # check fields
        origin_fields = fut.get_all_fields_in_shape(ground_truth)
        test_fields = fut.get_all_fields_in_shape(test_vector)

        new_fields = ['region', 'originfid', 'seed_0', 'seed_1', 'tile_o']
        expected_fields = origin_fields + new_fields
        self.assertTrue(len(expected_fields) == len(test_fields))
        self.assertTrue(all(field in test_fields for field in expected_fields))
Exemple #19
0
def confusion_sar_optical_parameter(iota2_dir: str,
                                    logger: Optional[Logger] = LOGGER):
    """
    return a list of tuple containing the classification and the associated
    shapeFile to compute a confusion matrix
    """
    ref_vectors_dir = os.path.join(iota2_dir, "dataAppVal", "bymodels")
    classifications_dir = os.path.join(iota2_dir, "classif")

    vector_seed_pos = 4
    vector_tile_pos = 0
    vector_model_pos = 2
    classif_seed_pos = 5
    classif_tile_pos = 1
    classif_model_pos = 3

    vectors = fu.FileSearch_AND(ref_vectors_dir, True, ".shp")
    classifications = fu.FileSearch_AND(classifications_dir, True, "Classif",
                                        "model", "seed", ".tif")

    group = []
    for vector in vectors:
        vec_name = os.path.basename(vector)
        seed = vec_name.split("_")[vector_seed_pos]
        tile = vec_name.split("_")[vector_tile_pos]
        model = vec_name.split("_")[vector_model_pos]
        key = (seed, tile, model)
        fields = fu.get_all_fields_in_shape(vector)
        if len(
                fu.getFieldElement(vector,
                                   driverName="ESRI Shapefile",
                                   field=fields[0],
                                   mode="all",
                                   elemType="str")) != 0:
            group.append((key, vector))
    for classif in classifications:
        classif_name = os.path.basename(classif)
        seed = classif_name.split("_")[classif_seed_pos].split(".tif")[0]
        tile = classif_name.split("_")[classif_tile_pos]
        model = classif_name.split("_")[classif_model_pos]
        key = (seed, tile, model)
        group.append((key, classif))
    # group by keys
    groups_param_buff = [param for key, param in fu.sortByFirstElem(group)]
    groups_param = []
    # check if all parameter to find are found.
    for group in groups_param_buff:
        if len(group) != 3:
            logger.debug(f"all parameter to use Dempster-Shafer fusion, "
                         f"not found : {group}")
        else:
            groups_param.append(group)

    # output
    output_parameters = []
    for param in groups_param:
        for sub_param in param:
            if ".shp" in sub_param:
                ref_vector = sub_param
            elif "SAR.tif" in sub_param:
                classif_sar = sub_param
            elif ".tif" in sub_param and "SAR.tif" not in sub_param:
                classif_opt = sub_param
        output_parameters.append((ref_vector, classif_opt))
        output_parameters.append((ref_vector, classif_sar))

    return output_parameters
Exemple #20
0
def splitInSubSets(vectoFile,
                   dataField,
                   regionField,
                   ratio=0.5,
                   seeds=1,
                   driver_name="SQLite",
                   learningFlag="learn",
                   validationFlag="validation",
                   unusedFlag="unused",
                   crossValidation=False,
                   splitGroundTruth=True,
                   random_seed=None):
    """
    This function is dedicated to split a shape into N subsets
    of training and validations samples by adding a new field
    by subsets (seed_X) containing 'learn', 'validation' or 'unused'

    Parameters
    ----------
    
    vectoFile : string
        input vector file
    dataField : string
        field which discriminate class
    regionField : string
        field which discriminate region
    ratio : int
        ratio between learn and validation features
    seeds : int
        number of random splits
    driver_name : string
        OGR layer name
    learningFlag : string
        learning flag
    validationFlag : string
        validation flag
    unusedFlag : string
        unused flag
    crossValidation : bool
        enable cross validation split
    splitGroundTruth
        enable the ground truth split
    random_seed : int
        random seed
    """
    driver = ogr.GetDriverByName(driver_name)
    source = driver.Open(vectoFile, 1)
    layer = source.GetLayer(0)

    class_avail = fut.getFieldElement(vectoFile,
                                      driverName=driver_name,
                                      field=dataField,
                                      mode="unique",
                                      elemType="int")
    region_avail = fut.getFieldElement(vectoFile,
                                       driverName=driver_name,
                                       field=regionField,
                                       mode="unique",
                                       elemType="str")
    all_fields = fut.get_all_fields_in_shape(vectoFile, driver=driver_name)

    fid_area = [(f.GetFID(), f.GetGeometryRef().GetArea()) for f in layer]
    fid = [fid_ for fid_, area in fid_area]

    id_learn = []
    id_val = []
    if crossValidation:
        id_CrossVal = get_CrossValId(layer, dataField, class_avail, seeds,
                                     regionField, region_avail)
    for seed in range(seeds):
        source = driver.Open(vectoFile, 1)
        layer = source.GetLayer(0)

        seed_field_name = "seed_" + str(seed)
        seed_field = ogr.FieldDefn(seed_field_name, ogr.OFTString)

        if seed_field_name not in all_fields:
            layer.CreateField(seed_field)
        if crossValidation is False:
            random_seed_number = None
            if random_seed is not None:
                random_seed_number = random_seed + seed
            id_learn, id_val = get_random_poly(layer, dataField, class_avail,
                                               ratio, regionField,
                                               region_avail,
                                               random_seed_number)
        else:
            id_learn = id_CrossVal[seed]

        if splitGroundTruth is False:
            id_learn = id_learn.union(id_val)
        for i in fid:
            flag = None
            if i in id_learn:
                flag = learningFlag
                if seed == seeds - 1 and crossValidation:
                    flag = validationFlag
            elif crossValidation:
                flag = unusedFlag
            elif i in id_val:
                flag = validationFlag

            feat = layer.GetFeature(i)
            feat.SetField(seed_field_name, flag)
            layer.SetFeature(feat)
        i = layer = None
Exemple #21
0
def extract_maj_vote_samples(vec_in: str,
                             vec_out: str,
                             ratio_to_keep: float,
                             data_field: str,
                             region_field: str,
                             driver_name: Optional[str] = "ESRI Shapefile"):
    """
    dedicated to extract samples by class according to a ratio.
    Samples are remove from vec_in and place in vec_out

    Parameters
    ----------
    vec_in : string
        path to a shapeFile (.shp)
    vec_out : string
        path to a sqlite (.sqlite)
    ratio_to_keep [float]
        percentage of samples to extract ratio_to_keep = 0.1
        mean extract 10% of each class in each regions
    dataField : string
        field containing class labels
    regionField : string
        field containing regions labels
    driver_name : string
        OGR driver
    """
    from osgeo import ogr
    from iota2.Common import FileUtils as fut
    from iota2.Sampling import SplitInSubSets as subset
    from iota2.Common.Utils import run
    class_avail = fut.getFieldElement(vec_in,
                                      driverName=driver_name,
                                      field=data_field,
                                      mode="unique",
                                      elemType="int")
    region_avail = fut.getFieldElement(vec_in,
                                       driverName=driver_name,
                                       field=region_field,
                                       mode="unique",
                                       elemType="str")

    driver = ogr.GetDriverByName(driver_name)
    source = driver.Open(vec_in, 1)
    layer = source.GetLayer(0)

    sample_id_to_extract, _ = subset.get_random_poly(layer, data_field,
                                                     class_avail,
                                                     ratio_to_keep,
                                                     region_field,
                                                     region_avail)

    # Create new file with targeted FID
    fid_samples_in = built_where_sql_exp(sample_id_to_extract, clause="in")
    cmd = f"ogr2ogr -where '{fid_samples_in}' -f 'SQLite' {vec_out} {vec_in}"
    run(cmd)

    # remove in vec_in targeted FID
    vec_in_rm = vec_in.replace(".shp", "_tmp.shp")
    fid_samples_not_in = built_where_sql_exp(sample_id_to_extract,
                                             clause="not in")
    cmd = f"ogr2ogr -where '{fid_samples_not_in}' {vec_in_rm} {vec_in}"
    run(cmd)

    fut.removeShape(vec_in.replace(".shp", ""),
                    [".prj", ".shp", ".dbf", ".shx"])

    cmd = f"ogr2ogr {vec_in} {vec_in_rm}"
    run(cmd)

    fut.removeShape(vec_in_rm.replace(".shp", ""),
                    [".prj", ".shp", ".dbf", ".shx"])
Exemple #22
0
def generate_samples_classif_mix(folder_sample: str,
                                 working_directory: str,
                                 train_shape: str,
                                 path_wd: str,
                                 output_path: str,
                                 annual_crop: List[Union[str, int]],
                                 all_class: List[Union[str, int]],
                                 data_field: str,
                                 previous_classif_path: str,
                                 proj: int,
                                 runs: Union[str, int],
                                 enable_cross_validation: bool,
                                 region_field: str,
                                 validity_threshold: int,
                                 target_resolution: int,
                                 sar_optical_post_fusion: bool,
                                 sensors_parameters: sensors_params_type,
                                 folder_features: Optional[str] = None,
                                 ram: Optional[int] = 128,
                                 w_mode: Optional[bool] = False,
                                 test_mode: Optional[bool] = False,
                                 test_shape_region: Optional[str] = None,
                                 sample_sel: Optional[str] = None,
                                 mode: Optional[str] = "usually",
                                 logger: Optional[Logger] = LOGGER):
    """
    usage : from one classification, chose randomly annual sample merge
            with non annual sample and extract features.
    IN:
        folderSample [string] : output folder
        workingDirectory [string] : computation folder
        trainShape [string] : vector shape (polygons) to sample
        pathWd [string] : if different from None, enable HPC mode
                          (copy at ending)
        featuresPath [string] : path to all stack
        annualCrop [list of string/int] : list containing annual crops
                                          ex : [11,12]
        AllClass [list of string/int] : list containing all classes in
                                        vector shape ex : [11,12,51..]
        cfg [string] : configuration file class
        previousClassifPath [string] : path to the iota2 output
                                       directory which generate previous
                                       classification
        dataField [string] : data's field into vector shape
        testMode [bool] : enable testMode -> iota2tests.py
        testPrevConfig [string] : path to the configuration file which generate
                                  previous classification
        testShapeRegion [string] : path to the shapefile representing region in
                                   the tile.
        testFeaturePath [string] : path to the stack of data

    OUT:
        samples [string] : vector shape containing points
    """
    from iota2.Sampling.SamplesSelection import prepare_selection
    from iota2.Sampling import GenAnnualSamples as genAS
    if os.path.exists(
            os.path.join(
                folder_sample,
                train_shape.split("/")[-1].replace(".shp",
                                                   "_Samples.sqlite"))):
        return None

    if enable_cross_validation:
        runs = runs - 1
    features_path = os.path.join(output_path, "features")
    sample_sel_directory = os.path.join(output_path, "samplesSelection")

    work_dir = sample_sel_directory
    if working_directory:
        work_dir = working_directory

    data_field = data_field.lower()

    current_tile = (os.path.splitext(os.path.basename(train_shape))[0])

    if sample_sel:
        sample_selection = sample_sel
    else:
        sample_selection = prepare_selection(sample_sel_directory,
                                             current_tile)

    non_annual_shape = os.path.join(
        work_dir, "{}_nonAnnual_selection.sqlite".format(current_tile))
    annual_shape = os.path.join(
        work_dir, "{}_annual_selection.sqlite".format(current_tile))
    # garde toutes les classes pérennes
    nb_feat_nannu = extract_class(sample_selection, non_annual_shape,
                                  all_class, data_field)

    regions = fu.getFieldElement(train_shape,
                                 driverName="ESRI Shapefile",
                                 field=region_field,
                                 mode="unique",
                                 elemType="str")
    print(sample_selection)
    print(train_shape)
    # avoir la répartition des classes anuelles par seed et par region
    # -> pouvoir faire annu_repartition[11][R][S]
    annu_repartition = get_repartition(sample_selection, annual_crop,
                                       data_field, region_field, regions, runs)

    nb_feat_annu = get_number_annual_sample(annu_repartition)

    # raster ref (in order to extract ROIs)
    ref = fu.FileSearch_AND(os.path.join(features_path, current_tile), True,
                            "MaskCommunSL.tif")[0]

    if nb_feat_nannu > 0:
        all_coord = get_points_coord_in_shape(non_annual_shape, "SQLite")
    else:
        all_coord = [0]

    classification_raster = extract_roi(os.path.join(previous_classif_path,
                                                     "final",
                                                     "Classif_Seed_0.tif"),
                                        current_tile,
                                        path_wd,
                                        output_path,
                                        f"Classif_{current_tile}",
                                        ref,
                                        test_mode,
                                        test_output=folder_sample)
    validity_raster = extract_roi(os.path.join(previous_classif_path, "final",
                                               "PixelsValidity.tif"),
                                  current_tile,
                                  path_wd,
                                  output_path,
                                  f"Cloud{current_tile}",
                                  ref,
                                  test_mode,
                                  test_output=folder_sample)

    # build regions mask into the tile
    masks = [
        get_region_model_in_tile(current_tile,
                                 current_region,
                                 output_path,
                                 path_wd,
                                 classification_raster,
                                 region_field,
                                 test_mode,
                                 test_shape_region,
                                 test_output_folder=folder_sample)
        for current_region in regions
    ]

    if nb_feat_annu > 0:
        annual_points = genAS.genAnnualShapePoints(
            all_coord, "SQLite", working_directory, target_resolution,
            annual_crop, data_field, current_tile, validity_threshold,
            validity_raster, classification_raster, masks, train_shape,
            annual_shape, proj, region_field, runs, annu_repartition)

    merge_name = train_shape.split("/")[-1].replace(".shp", "_selectionMerge")
    sample_selection = os.path.join(working_directory, f"{merge_name}.sqlite")
    if (nb_feat_nannu > 0) and (nb_feat_annu > 0 and annual_points):
        fu.mergeSQLite(merge_name, working_directory,
                       [non_annual_shape, annual_shape])

    elif (nb_feat_nannu > 0) and not (nb_feat_annu > 0 and annual_points):
        # If not annual samples can be added then annual classes are ignored
        shutil.copy(non_annual_shape, sample_selection)
    elif not (nb_feat_nannu > 0) and (nb_feat_annu > 0 and annual_points):
        # If not non annual samples are found then use all annual samples
        shutil.copy(annual_shape, sample_selection)
    samples = os.path.join(
        working_directory,
        train_shape.split("/")[-1].replace(".shp", "_Samples.sqlite"))

    sample_extr, dep_tmp = get_features_application(
        sample_selection, working_directory, samples, data_field, output_path,
        sar_optical_post_fusion, sensors_parameters, ram, mode)

    # sampleExtr.ExecuteAndWriteOutput()
    multi_proc = mp.Process(target=executeApp, args=[sample_extr])
    multi_proc.start()
    multi_proc.join()

    split_vectors = split_vector_by_region(in_vect=samples,
                                           output_dir=working_directory,
                                           region_field=region_field,
                                           runs=int(runs),
                                           driver="SQLite",
                                           proj_in="EPSG:" + str(proj),
                                           proj_out="EPSG:" + str(proj))
    if test_mode:
        split_vectors = None

    if path_wd and os.path.exists(samples):
        for sample in split_vectors:
            shutil.copy(sample, folder_sample)

    if os.path.exists(non_annual_shape):
        os.remove(non_annual_shape)
    if os.path.exists(annual_shape):
        os.remove(annual_shape)

    if w_mode:
        target_directory = os.path.join(folder_features, current_tile)
        if not os.path.exists(target_directory):
            try:
                os.mkdir(target_directory)
            except OSError:
                logger.warning(f"{target_directory} allready exists")
            try:
                os.mkdir(os.path.join(target_directory, "tmp"))
            except OSError:
                logger.warning(f"{target_directory}/tmp allready exists")
        from_dir = os.path.join(working_directory, current_tile, "tmp")
        to_dir = os.path.join(target_directory, "tmp")
        if os.path.exists(from_dir):
            fu.updateDirectory(from_dir, to_dir)

    os.remove(samples)
    os.remove(classification_raster)
    os.remove(validity_raster)
    for mask in masks:
        os.remove(mask)
    return split_vectors
Exemple #23
0
def generate_samples(train_shape_dic,
                     path_wd,
                     data_field: str,
                     output_path: str,
                     annual_crop: List[Union[str, int]],
                     crop_mix: bool,
                     auto_context_enable: bool,
                     region_field: str,
                     proj: Union[int, str],
                     enable_cross_validation: bool,
                     runs: int,
                     sensors_parameters: sensors_params_type,
                     sar_optical_post_fusion: bool,
                     samples_classif_mix: Optional[bool] = False,
                     output_path_annual: Optional[str] = None,
                     ram: Optional[int] = 128,
                     w_mode: Optional[int] = False,
                     folder_annual_features: Optional[str] = None,
                     previous_classif_path: Optional[str] = None,
                     validity_threshold: Optional[int] = None,
                     target_resolution: Optional[int] = None,
                     test_mode: Optional[bool] = False,
                     test_shape_region: Optional[str] = None,
                     sample_selection: Optional[str] = None,
                     logger: Optional[Logger] = LOGGER):
    """
    usage : generation of vector shape of points with features

    IN:
    train_shape_dic [dict] : dictionnary containing a shape file in value
    pathWd [string] : working directory
    data_field [str] : data field name
    output_path [str] : the ouput path
    annual_crop: List[str or int] : the list of annual crop
    crop_mix [bool] : activate the crop mix mode
    auto_context_enable [bool] : activate auto context mode
    region_field [str] : the region field name
    proj [int] : the working projection
    enable_cross_validation [bool] : enable cross validation
    runs: int,
    samples_classif_mix: Optional[bool] = False,
    output_path_annual: Optional[str] = None,
    ram=128,
    w_mode=False,
    folder_annual_features=None,
    previous_classif_path: Optional[str] = None,
    validity_threshold: Optional[int] = None,
    target_resolution: Optional[int] = None,
    testMode [bool] : enable test
    features [string] : path to features allready compute (refl + NDVI ...)
    testFeaturePath [string] : path to stack of data without features
    testAnnualFeaturePath [string] : path to stack of data without features
    testPrevConfig [string] : path to a configuration file
    testShapeRegion [string] : path to a vector shapeFile, representing region
                               in tile

    OUT:
    samples [string] : path to output vector shape
    """

    # mode must be "usally" or "SAR"
    mode = list(train_shape_dic.keys())[0]
    train_shape = train_shape_dic[mode]

    all_class = fu.getFieldElement(train_shape,
                                   "ESRI Shapefile",
                                   data_field,
                                   mode="unique",
                                   elemType="str")

    for current_class in annual_crop.data:
        try:
            all_class.remove(str(current_class))
        except ValueError:
            logger.warning(
                f"Class {current_class} doesn't exist in {train_shape}")

    logger.info(f"All classes: {all_class}")
    logger.info(f"Annual crop: {annual_crop}")

    folder_features = os.path.join(output_path, "features")
    folder_sample = os.path.join(output_path, "learningSamples")
    if not os.path.exists(folder_sample):
        try:
            os.mkdir(folder_sample)
        except OSError:
            logger.warning(f"{folder_sample} allready exists")

    working_directory = folder_sample
    if path_wd:
        working_directory = path_wd

    if crop_mix is False or auto_context_enable:
        samples = generate_samples_simple(
            folder_sample, working_directory, train_shape, path_wd, data_field,
            region_field, output_path, runs, proj, enable_cross_validation,
            sensors_parameters, sar_optical_post_fusion, ram, w_mode,
            folder_features, sample_selection, mode)

    elif crop_mix is True and samples_classif_mix is False:
        samples = generate_samples_crop_mix(
            folder_sample, working_directory, output_path, output_path_annual,
            train_shape, path_wd, annual_crop, all_class, data_field,
            folder_features, folder_annual_features, enable_cross_validation,
            runs, region_field, proj, sar_optical_post_fusion,
            sensors_parameters, ram, w_mode, test_mode, sample_selection, mode)

    elif crop_mix is True and samples_classif_mix is True:
        if isinstance(proj, str):
            proj = int(proj.split(":")[-1])
        samples = generate_samples_classif_mix(
            folder_sample, working_directory, train_shape, path_wd,
            output_path, annual_crop, all_class, data_field,
            previous_classif_path, proj, runs, enable_cross_validation,
            region_field, validity_threshold, target_resolution,
            sar_optical_post_fusion, sensors_parameters, folder_features, ram,
            w_mode, test_mode, test_shape_region, sample_selection, mode)
    if test_mode:
        return samples
Exemple #24
0
def get_regions_area(vectors: List[str], regions: List[str],
                     formatting_vectors_dir: str, working_directory: [str],
                     region_field: [str]) -> Dict[str, List[str]]:
    """
    usage : get all models polygons area
    IN
    vectors [list of strings] : path to vector file
    regions [list of string] : all possible regions
    formatting_vectors_dir [string] : path to /iota2/formattingVectors
    workingDirectory [string]
    region_field [string]

    OUT
    dico_region_area [dict] : dictionnary containing area by region's key
    """
    from iota2.Common.Utils import run
    from iota2.Common import FileUtils as fut
    import sqlite3 as db
    tmp_data = []
    # init dict
    dico_region_area = {}
    dico_region_tile = {}
    for reg in regions:
        dico_region_area[reg] = 0.0
        dico_region_tile[reg] = []

    for vector in vectors:
        # move vectors to sqlite (faster format)
        transform_dir = formatting_vectors_dir
        if working_directory:
            transform_dir = working_directory
        transform_vector_name = os.path.split(vector)[-1].replace(
            ".shp", ".sqlite")
        sqlite_vector = os.path.join(transform_dir, transform_vector_name)
        cmd = "ogr2ogr -f 'SQLite' {} {}".format(sqlite_vector, vector)
        run(cmd)
        tmp_data.append(sqlite_vector)
        region_vector = fut.getFieldElement(sqlite_vector,
                                            driverName="SQLite",
                                            field=region_field,
                                            mode="unique",
                                            elemType="str")
        conn = db.connect(sqlite_vector)
        conn.enable_load_extension(True)
        conn.load_extension("mod_spatialite")
        cursor = conn.cursor()
        table_name = (transform_vector_name.replace(".sqlite", "")).lower()
        for current_region in region_vector:
            sql_clause = (
                f"SELECT AREA(GEOMFROMWKB(GEOMETRY)) FROM "
                f"{table_name} WHERE {region_field}={current_region}")
            cursor.execute(sql_clause)
            res = cursor.fetchall()

            dico_region_area[current_region] += sum([area[0] for area in res])

            if vector not in dico_region_tile[current_region]:
                dico_region_tile[current_region].append(sqlite_vector)

        conn = cursor = None

    return dico_region_area, dico_region_tile, tmp_data