Exemple #1
0
def DataAugmentationSynthetic(samples,
                              groundTruth,
                              dataField,
                              strategies,
                              workingDirectory=None):
    """Compute how many samples should be add in the sample set and launch data augmentation method

    Parameters
    ----------
    samples : string
        path to a vector file to augment samples
    groundTruth : string
        path to the original ground truth vector file, in order to list interger / float fields
    dataField : string
        data field's name in samples
    strategies : dict
        dictionary
    workingDirectory : string
        path to a working directory
    """

    if GetRegionFromSampleName(samples) in strategies[
            "target_models"] or "all" in strategies["target_models"]:
        from collections import Counter
        class_count = Counter(
            fut.getFieldElement(samples,
                                driverName="SQLite",
                                field=dataField,
                                mode="all",
                                elemType="int"))

        class_augmentation = SamplesAugmentationCounter(
            class_count,
            mode=strategies["samples.strategy"],
            minNumber=strategies.get("samples.strategy.minNumber", None),
            byClass=strategies.get("samples.strategy.byClass", None))

        fields_types = GetFieldsType(groundTruth)

        excluded_fields_origin = [
            field_name.lower()
            for field_name, field_type in list(fields_types.items())
            if "int" in field_type or "flaot" in field_type
        ]
        samples_fields = fut.get_all_fields_in_shape(samples, driver='SQLite')
        excluded_fields = list(
            set(excluded_fields_origin).intersection(samples_fields))
        excluded_fields.append("originfid")

        DoAugmentation(samples,
                       class_augmentation,
                       strategy=strategies["strategy"],
                       field=dataField,
                       excluded_fields=excluded_fields,
                       Jstdfactor=strategies.get("strategy.jitter.stdfactor",
                                                 None),
                       Sneighbors=strategies.get("strategy.smote.neighbors",
                                                 None),
                       workingDirectory=workingDirectory)
Exemple #2
0
    def getValuesSortedByCoordinates(vector):
        values = []
        driver = ogr.GetDriverByName(drivername)
        ds = driver.Open(vector, 0)
        lyr = ds.GetLayer()
        fields = fu.get_all_fields_in_shape(vector, drivername)
        for feature in lyr:
            if typegeom == "point":
                x = feature.GetGeometryRef().GetX(),
                y = feature.GetGeometryRef().GetY()
            elif typegeom == "polygon":
                x = feature.GetGeometryRef().Centroid().GetX()
                y = feature.GetGeometryRef().Centroid().GetY()
            fields_val = getFieldValue(feature, fields)
            values.append((x, y, fields_val))

        values = sorted(values, key=priority)
        return values
Exemple #3
0
def compareVectorFile(vect_1,
                      vect_2,
                      mode='table',
                      typegeom='point',
                      drivername="SQLite"):
    """used to compare two SQLite vector files

    mode=='table' is faster but does not work with connected OTB applications.

    Parameters
    ----------
    vect_1 : string
        path to a vector file
    vect_2 : string
        path to a vector file
    mode : string
        'table' or 'coordinates'
        -> table : compare sqlite tables
        -> 'coordinates' : compare features geo-referenced at the same
                           coordinates
    typegeom : string
        'point' or 'polygon'
    drivername : string
        ogr driver's name

    Return
    ------
    bool
        True if vectors are the same
    """
    import ogr
    from itertools import zip_longest
    from Common import FileUtils as fu
    import sqlite3 as lite
    import pandas as pad

    def getFieldValue(feat, fields):
        return dict([(currentField, feat.GetField(currentField))
                     for currentField in fields])

    def priority(item):
        return (item[0], item[1])

    def getValuesSortedByCoordinates(vector):
        values = []
        driver = ogr.GetDriverByName(drivername)
        ds = driver.Open(vector, 0)
        lyr = ds.GetLayer()
        fields = fu.get_all_fields_in_shape(vector, drivername)
        for feature in lyr:
            if typegeom == "point":
                x = feature.GetGeometryRef().GetX(),
                y = feature.GetGeometryRef().GetY()
            elif typegeom == "polygon":
                x = feature.GetGeometryRef().Centroid().GetX()
                y = feature.GetGeometryRef().Centroid().GetY()
            fields_val = getFieldValue(feature, fields)
            values.append((x, y, fields_val))

        values = sorted(values, key=priority)
        return values

    fields_1 = fu.get_all_fields_in_shape(vect_1, drivername)
    fields_2 = fu.get_all_fields_in_shape(vect_2, drivername)

    for field_1, field_2 in zip_longest(fields_1, fields_2, fillvalue=None):
        if not field_1 == field_2:
            return False

    if mode == 'table':
        connection_1 = lite.connect(vect_1)
        df_1 = pad.read_sql_query("SELECT * FROM output", connection_1)

        connection_2 = lite.connect(vect_2)
        df_2 = pad.read_sql_query("SELECT * FROM output", connection_2)

        try:
            table = (df_1 != df_2).any(1)
            if True in table.tolist():
                return False
            else:
                return True
        except ValueError:
            return False

    elif mode == 'coordinates':
        values_1 = getValuesSortedByCoordinates(vect_1)
        values_2 = getValuesSortedByCoordinates(vect_2)
        sameFeat = [val_1 == val_2 for val_1, val_2 in zip(values_1, values_2)]
        if False in sameFeat:
            return False
        return True
    else:
        raise Exception("mode parameter must be 'table' or 'coordinates'")
Exemple #4
0
def extraction(vectorFill, vectorSource, field, field_val, driversFill,
               driversSource):

    ogrDriversFill = [
        ogr.GetDriverByName(currentDriver) for currentDriver in driversFill
    ]
    ogrDriversSource = ogr.GetDriverByName(driversSource)

    dataSourceFill = [
        currentDriver.Open(currentShape, 1)
        for currentDriver, currentShape in zip(ogrDriversFill, vectorFill)
    ]
    dataSourceSource = ogrDriversSource.Open(vectorSource, 0)

    layerFill = [
        currentDataSource.GetLayer() for currentDataSource in dataSourceFill
    ]
    layerSource = dataSourceSource.GetLayer()
    FIDColumn = layerSource.GetFIDColumn()
    if FIDColumn == "":
        FIDColumn = "FID"

    FIDMAX = [
        max([feat.GetFID() for feat in currentLayerToFill])
        for currentLayerToFill in layerFill
    ]

    listFieldSource = fu.get_all_fields_in_shape(vectorSource, driversSource)

    All_FID = [(currentFeat.GetField(field), currentFeat.GetFID())
               for currentFeat in layerSource
               if currentFeat.GetField(field) in field_val]
    layerSource.ResetReading()
    for layerToFill in layerFill:
        layerToFill.ResetReading()
    All_FID = fu.sortByFirstElem(All_FID)

    for currentClass, FID in All_FID:
        splits = fu.splitList(FID, len(vectorFill))
        i = 0
        for currentSplit, layerToFill, fidMax in zip(splits, layerFill,
                                                     FIDMAX):

            chunkSublistFID = fu.splitList(currentSplit,
                                           1 + int(len(currentSplit) / 1000))
            filterFID = "(" + " OR ".join([
                "(" + " OR ".join([
                    FIDColumn + "=" + str(currentFID) for currentFID in chunk
                ]) + ")" for chunk in chunkSublistFID
            ]) + ")"
            layerSource.SetAttributeFilter(filterFID)
            newfid = fidMax
            print("Ajout de " + str(currentClass) + " dans " + vectorFill[i] +
                  " filter : " + filterFID)
            for feature in layerSource:
                geom = feature.GetGeometryRef()
                print(geom)
                dstfeature = ogr.Feature(layerSource.GetLayerDefn())
                dstfeature.SetGeometry(geom)
                dstfeature.SetFID(newfid + 1)
                newfid += 1
                indIn = 0
                while indIn < len(listFieldSource):
                    dstfeature.SetField(
                        listFieldSource[indIn],
                        feature.GetField(listFieldSource[indIn]))
                    indIn += 1
                layerToFill.CreateFeature(dstfeature)

                dstfeature.Destroy()
            i += 1

    for layerToFill in layerFill:
        layerToFill = None
    layerSource = None
Exemple #5
0
def extraction(shapeE, DriverE, field, field_val, nb_extrac, shapeS, fieldo,
               DriverS):

    driver = ogr.GetDriverByName(DriverE)
    dataSource = driver.Open(shapeE, 0)
    layer = dataSource.GetLayer()

    driver = ogr.GetDriverByName(DriverS)
    dataSourceS = driver.Open(shapeS, 1)
    layerS = dataSourceS.GetLayer()

    print("checking FID")
    All_FID = [(currentFeat.GetField(field), currentFeat.GetFID())
               for currentFeat in layer
               if currentFeat.GetField(field) in field_val]
    All_FID = fu.sortByFirstElem(All_FID)
    print("FIDs found")
    # get Fieldo index

    featureDefnS = layerS.GetLayerDefn()
    indfieldo = featureDefnS.GetFieldIndex(fieldo)

    # Fields Lists
    listFieldIn = fu.get_all_fields_in_shape(shapeE, DriverE)
    listFieldOut = fu.get_all_fields_in_shape(shapeS, DriverS)

    numberOfFeatures = layerS.GetFIDColumn()

    # in case of not closed layers

    layerS.ResetReading()
    layer.ResetReading()

    i = 0
    fid_ind = layerS
    for val in field_val:
        print("fill up " + str(val) + " values")
        # list of Fid of the current landcover type (val)
        listFid = [x[1] for x in All_FID if x[0] == val][0]
        # Random selection
        print(len(listFid))
        nbExtraction = nb_extrac[i]
        if nbExtraction > len(listFid):
            nbExtraction = len(listFid)
            print("Warning : class " + str(val) + " extraction set to " +
                  str(nbExtraction))
            sublistFid = random.sample(listFid, nbExtraction)

        chunkSublistFID = fu.splitList(sublistFid,
                                       1 + int(len(sublistFid) / 1000))
        filterFID = []
        for chunk in chunkSublistFID:
            # Filter input shapefile
            filterFID.append("(" + " OR ".join([
                layer.GetFIDColumn() + "=" + str(currentFID)
                for currentFID in chunk
            ]) + ")")

        ffilter = " OR ".join(filterFID)
        layer.SetAttributeFilter(ffilter)
        newfid = max([feat.GetFID() for feat in layerS])
        # filtered input features into output shapefile
        for feature in layer:
            geom = feature.GetGeometryRef()
            dstfeature = ogr.Feature(layerS.GetLayerDefn())
            dstfeature.SetGeometry(geom)
            dstfeature.SetFID(newfid + 1)
            newfid += 1
            indIn = 0
            while indIn < len(listFieldIn):
                dstfeature.SetField(listFieldOut[indIn],
                                    feature.GetField(listFieldIn[indIn]))
                indIn += 1
            layerS.CreateFeature(dstfeature)
            dstfeature.Destroy()
        i += 1

        layerS = layer = None

    print("DONE")