def DataAugmentationSynthetic(samples, groundTruth, dataField, strategies, workingDirectory=None): """Compute how many samples should be add in the sample set and launch data augmentation method Parameters ---------- samples : string path to a vector file to augment samples groundTruth : string path to the original ground truth vector file, in order to list interger / float fields dataField : string data field's name in samples strategies : dict dictionary workingDirectory : string path to a working directory """ if GetRegionFromSampleName(samples) in strategies[ "target_models"] or "all" in strategies["target_models"]: from collections import Counter class_count = Counter( fut.getFieldElement(samples, driverName="SQLite", field=dataField, mode="all", elemType="int")) class_augmentation = SamplesAugmentationCounter( class_count, mode=strategies["samples.strategy"], minNumber=strategies.get("samples.strategy.minNumber", None), byClass=strategies.get("samples.strategy.byClass", None)) fields_types = GetFieldsType(groundTruth) excluded_fields_origin = [ field_name.lower() for field_name, field_type in list(fields_types.items()) if "int" in field_type or "flaot" in field_type ] samples_fields = fut.get_all_fields_in_shape(samples, driver='SQLite') excluded_fields = list( set(excluded_fields_origin).intersection(samples_fields)) excluded_fields.append("originfid") DoAugmentation(samples, class_augmentation, strategy=strategies["strategy"], field=dataField, excluded_fields=excluded_fields, Jstdfactor=strategies.get("strategy.jitter.stdfactor", None), Sneighbors=strategies.get("strategy.smote.neighbors", None), workingDirectory=workingDirectory)
def getValuesSortedByCoordinates(vector): values = [] driver = ogr.GetDriverByName(drivername) ds = driver.Open(vector, 0) lyr = ds.GetLayer() fields = fu.get_all_fields_in_shape(vector, drivername) for feature in lyr: if typegeom == "point": x = feature.GetGeometryRef().GetX(), y = feature.GetGeometryRef().GetY() elif typegeom == "polygon": x = feature.GetGeometryRef().Centroid().GetX() y = feature.GetGeometryRef().Centroid().GetY() fields_val = getFieldValue(feature, fields) values.append((x, y, fields_val)) values = sorted(values, key=priority) return values
def compareVectorFile(vect_1, vect_2, mode='table', typegeom='point', drivername="SQLite"): """used to compare two SQLite vector files mode=='table' is faster but does not work with connected OTB applications. Parameters ---------- vect_1 : string path to a vector file vect_2 : string path to a vector file mode : string 'table' or 'coordinates' -> table : compare sqlite tables -> 'coordinates' : compare features geo-referenced at the same coordinates typegeom : string 'point' or 'polygon' drivername : string ogr driver's name Return ------ bool True if vectors are the same """ import ogr from itertools import zip_longest from Common import FileUtils as fu import sqlite3 as lite import pandas as pad def getFieldValue(feat, fields): return dict([(currentField, feat.GetField(currentField)) for currentField in fields]) def priority(item): return (item[0], item[1]) def getValuesSortedByCoordinates(vector): values = [] driver = ogr.GetDriverByName(drivername) ds = driver.Open(vector, 0) lyr = ds.GetLayer() fields = fu.get_all_fields_in_shape(vector, drivername) for feature in lyr: if typegeom == "point": x = feature.GetGeometryRef().GetX(), y = feature.GetGeometryRef().GetY() elif typegeom == "polygon": x = feature.GetGeometryRef().Centroid().GetX() y = feature.GetGeometryRef().Centroid().GetY() fields_val = getFieldValue(feature, fields) values.append((x, y, fields_val)) values = sorted(values, key=priority) return values fields_1 = fu.get_all_fields_in_shape(vect_1, drivername) fields_2 = fu.get_all_fields_in_shape(vect_2, drivername) for field_1, field_2 in zip_longest(fields_1, fields_2, fillvalue=None): if not field_1 == field_2: return False if mode == 'table': connection_1 = lite.connect(vect_1) df_1 = pad.read_sql_query("SELECT * FROM output", connection_1) connection_2 = lite.connect(vect_2) df_2 = pad.read_sql_query("SELECT * FROM output", connection_2) try: table = (df_1 != df_2).any(1) if True in table.tolist(): return False else: return True except ValueError: return False elif mode == 'coordinates': values_1 = getValuesSortedByCoordinates(vect_1) values_2 = getValuesSortedByCoordinates(vect_2) sameFeat = [val_1 == val_2 for val_1, val_2 in zip(values_1, values_2)] if False in sameFeat: return False return True else: raise Exception("mode parameter must be 'table' or 'coordinates'")
def extraction(vectorFill, vectorSource, field, field_val, driversFill, driversSource): ogrDriversFill = [ ogr.GetDriverByName(currentDriver) for currentDriver in driversFill ] ogrDriversSource = ogr.GetDriverByName(driversSource) dataSourceFill = [ currentDriver.Open(currentShape, 1) for currentDriver, currentShape in zip(ogrDriversFill, vectorFill) ] dataSourceSource = ogrDriversSource.Open(vectorSource, 0) layerFill = [ currentDataSource.GetLayer() for currentDataSource in dataSourceFill ] layerSource = dataSourceSource.GetLayer() FIDColumn = layerSource.GetFIDColumn() if FIDColumn == "": FIDColumn = "FID" FIDMAX = [ max([feat.GetFID() for feat in currentLayerToFill]) for currentLayerToFill in layerFill ] listFieldSource = fu.get_all_fields_in_shape(vectorSource, driversSource) All_FID = [(currentFeat.GetField(field), currentFeat.GetFID()) for currentFeat in layerSource if currentFeat.GetField(field) in field_val] layerSource.ResetReading() for layerToFill in layerFill: layerToFill.ResetReading() All_FID = fu.sortByFirstElem(All_FID) for currentClass, FID in All_FID: splits = fu.splitList(FID, len(vectorFill)) i = 0 for currentSplit, layerToFill, fidMax in zip(splits, layerFill, FIDMAX): chunkSublistFID = fu.splitList(currentSplit, 1 + int(len(currentSplit) / 1000)) filterFID = "(" + " OR ".join([ "(" + " OR ".join([ FIDColumn + "=" + str(currentFID) for currentFID in chunk ]) + ")" for chunk in chunkSublistFID ]) + ")" layerSource.SetAttributeFilter(filterFID) newfid = fidMax print("Ajout de " + str(currentClass) + " dans " + vectorFill[i] + " filter : " + filterFID) for feature in layerSource: geom = feature.GetGeometryRef() print(geom) dstfeature = ogr.Feature(layerSource.GetLayerDefn()) dstfeature.SetGeometry(geom) dstfeature.SetFID(newfid + 1) newfid += 1 indIn = 0 while indIn < len(listFieldSource): dstfeature.SetField( listFieldSource[indIn], feature.GetField(listFieldSource[indIn])) indIn += 1 layerToFill.CreateFeature(dstfeature) dstfeature.Destroy() i += 1 for layerToFill in layerFill: layerToFill = None layerSource = None
def extraction(shapeE, DriverE, field, field_val, nb_extrac, shapeS, fieldo, DriverS): driver = ogr.GetDriverByName(DriverE) dataSource = driver.Open(shapeE, 0) layer = dataSource.GetLayer() driver = ogr.GetDriverByName(DriverS) dataSourceS = driver.Open(shapeS, 1) layerS = dataSourceS.GetLayer() print("checking FID") All_FID = [(currentFeat.GetField(field), currentFeat.GetFID()) for currentFeat in layer if currentFeat.GetField(field) in field_val] All_FID = fu.sortByFirstElem(All_FID) print("FIDs found") # get Fieldo index featureDefnS = layerS.GetLayerDefn() indfieldo = featureDefnS.GetFieldIndex(fieldo) # Fields Lists listFieldIn = fu.get_all_fields_in_shape(shapeE, DriverE) listFieldOut = fu.get_all_fields_in_shape(shapeS, DriverS) numberOfFeatures = layerS.GetFIDColumn() # in case of not closed layers layerS.ResetReading() layer.ResetReading() i = 0 fid_ind = layerS for val in field_val: print("fill up " + str(val) + " values") # list of Fid of the current landcover type (val) listFid = [x[1] for x in All_FID if x[0] == val][0] # Random selection print(len(listFid)) nbExtraction = nb_extrac[i] if nbExtraction > len(listFid): nbExtraction = len(listFid) print("Warning : class " + str(val) + " extraction set to " + str(nbExtraction)) sublistFid = random.sample(listFid, nbExtraction) chunkSublistFID = fu.splitList(sublistFid, 1 + int(len(sublistFid) / 1000)) filterFID = [] for chunk in chunkSublistFID: # Filter input shapefile filterFID.append("(" + " OR ".join([ layer.GetFIDColumn() + "=" + str(currentFID) for currentFID in chunk ]) + ")") ffilter = " OR ".join(filterFID) layer.SetAttributeFilter(ffilter) newfid = max([feat.GetFID() for feat in layerS]) # filtered input features into output shapefile for feature in layer: geom = feature.GetGeometryRef() dstfeature = ogr.Feature(layerS.GetLayerDefn()) dstfeature.SetGeometry(geom) dstfeature.SetFID(newfid + 1) newfid += 1 indIn = 0 while indIn < len(listFieldIn): dstfeature.SetField(listFieldOut[indIn], feature.GetField(listFieldIn[indIn])) indIn += 1 layerS.CreateFeature(dstfeature) dstfeature.Destroy() i += 1 layerS = layer = None print("DONE")