Exemple #1
0
def DataAugmentationSynthetic(samples,
                              groundTruth,
                              dataField,
                              strategies,
                              workingDirectory=None):
    """Compute how many samples should be add in the sample set and launch data augmentation method

    Parameters
    ----------
    samples : string
        path to a vector file to augment samples
    groundTruth : string
        path to the original ground truth vector file, in order to list interger / float fields
    dataField : string
        data field's name in samples
    strategies : dict
        dictionary
    workingDirectory : string
        path to a working directory
    """

    if GetRegionFromSampleName(samples) in strategies[
            "target_models"] or "all" in strategies["target_models"]:
        from collections import Counter
        class_count = Counter(
            fut.getFieldElement(samples,
                                driverName="SQLite",
                                field=dataField,
                                mode="all",
                                elemType="int"))

        class_augmentation = SamplesAugmentationCounter(
            class_count,
            mode=strategies["samples.strategy"],
            minNumber=strategies.get("samples.strategy.minNumber", None),
            byClass=strategies.get("samples.strategy.byClass", None))

        fields_types = GetFieldsType(groundTruth)

        excluded_fields_origin = [
            field_name.lower()
            for field_name, field_type in list(fields_types.items())
            if "int" in field_type or "flaot" in field_type
        ]
        samples_fields = fut.get_all_fields_in_shape(samples, driver='SQLite')
        excluded_fields = list(
            set(excluded_fields_origin).intersection(samples_fields))
        excluded_fields.append("originfid")

        DoAugmentation(samples,
                       class_augmentation,
                       strategy=strategies["strategy"],
                       field=dataField,
                       excluded_fields=excluded_fields,
                       Jstdfactor=strategies.get("strategy.jitter.stdfactor",
                                                 None),
                       Sneighbors=strategies.get("strategy.smote.neighbors",
                                                 None),
                       workingDirectory=workingDirectory)
Exemple #2
0
def createRegionsByTiles(shapeRegion,
                         field_Region,
                         pathToEnv,
                         pathOut,
                         pathWd,
                         logger_=logger):
    """
    create a shapeFile into tile's envelope for each regions in shapeRegion and for each tiles
    IN :
        - shapeRegion : the shape which contains all regions
        - field_Region : the field into the region's shape which describes each tile belong to which model
        - pathToEnv : path to the tile's envelope with priority
        - pathOut : path to store all resulting shapeFile
        - pathWd : path to working directory (not mandatory, due to cluster's architecture default = None)
    """
    pathName = pathWd
    if pathWd == None:
        #sequential case
        pathName = pathOut

    #getAllTiles
    AllTiles = fu.FileSearch_AND(pathToEnv, True, ".shp")
    regionList = fu.getFieldElement(shapeRegion, "ESRI Shapefile",
                                    field_Region, "unique")
    shpRegionList = splitVectorLayer(shapeRegion, field_Region, "int",
                                     regionList, pathName)
    AllClip = []
    for shp in shpRegionList:
        for tile in AllTiles:
            logger_.info("Extract %s in %s", shp, tile)
            pathToClip = fu.ClipVectorData(shp, tile, pathName)
            AllClip.append(pathToClip)

    if pathWd:
        for clip in AllClip:
            cmd = "cp " + clip.replace(".shp", "*") + " " + pathOut
            run(cmd)
    else:
        for shp in shpRegionList:
            path = shp.replace(".shp", "")
            os.remove(path + ".shp")
            os.remove(path + ".shx")
            os.remove(path + ".dbf")
            os.remove(path + ".prj")

    return AllClip
Exemple #3
0
def plotRelation(finalDataBasePath, dataField, seed, iota2Folder):

    outputs = []
    nomenclature = {
        10: "annualCrop",
        11: "ete",
        12: "hiver",
        211: "prairie",
        221: "verger",
        222: "vigne",
        223: "olivier",
        224: "arboriculture",
        31: "foret feuillus",
        32: "foret coniferes",
        33: "forets melangees",
        34: "pelouses",
        35: "estives-landes",
        36: "lande ligneuse",
        41: "bati",
        42: "bati diffus",
        43: "zones ind et com",
        44: "surface route",
        45: "surfaces minerales",
        46: "plages et dunes",
        51: "eau",
        52: "mer et oceans",
        53: "glaciers ou neiges et",
        255: "autres"
    }
    AllClasses = sorted(
        fut.getFieldElement(finalDataBasePath,
                            driverName="SQLite",
                            field=dataField,
                            mode="unique",
                            elemType="int"))

    #init
    valuesByClass = OrderedDict()
    for cClass in AllClasses:
        valuesByClass[cClass] = []

    driver = ogr.GetDriverByName("SQLite")
    dataSource = driver.Open(finalDataBasePath, 0)
    layer = dataSource.GetLayer()

    minVal = 1000000
    maxVal = 0
    minConf = 100
    maxConf = 0

    for feature in layer:
        val = feature.GetField("validity")
        if val > maxVal:
            maxVal = val
        if val < minVal:
            minVal = val
        conf = feature.GetField("confidence")
        if conf > maxConf:
            maxConf = conf
        if conf < minConf:
            minConf = conf
        cClass = feature.GetField(dataField)
        valuesByClass[cClass].append((conf, val))

    for cClass in valuesByClass:
        y = [cX for cX, cY in valuesByClass[cClass]]
        x = [cY for cX, cY in valuesByClass[cClass]]
        outputPath = iota2Folder + "/final/TMP/" + nomenclature[cClass].replace(
            " ", "_") + "_confFValid_Seed_" + str(seed) + ".png"
        print "Creating : " + outputPath
        #title="Confidence = f( Validity ) : Class :"+nomenclature[cClass]
        parametres = correlation.Parametres()
        parametres.xlims = [minVal, maxVal]
        parametres.ylims = [minConf, maxConf]
        parametres.xBinStep = 1
        parametres.yBinStep = 1
        correlation.plotCorrelation(x, y, "Validity", "Confidence", outputPath,
                                    parametres)
        outputs.append(outputPath)
    return outputs