def clip_shapefile_to_geometry(shapefile, clip_shapefile, out_filename, outside_filename, out_directory): """ Mask a shapefile with another shapefile.""" intersection = [] outside = [] with fopen(shapefile, "r") as src: meta = deepcopy(src.meta) with fopen(clip_shapefile) as clip_with: for feat in src: poly = shape(feat['geometry']) for clip_feat in clip_with: clip_poly = shape(clip_feat['geometry']) if clip_poly.contains(poly): intersection.append(feat) else: outside.append(feat) with fopen(os.path.join(out_directory, out_filename), 'w', **meta) as dst: print("Saving {}".format(out_filename)) for feat in intersection: dst.write(feat) with fopen(os.path.join(out_directory, outside_filename), 'w', **meta) as dst: print("Saving {}".format(outside_filename)) for feat in outside: dst.write(feat)
def buffer_shapefile(shp): buf = -0.00050 with fopen(shp, 'r') as polys: out = [] meta = polys.meta with fopen(shp, 'w', **meta) as dst: for feat in polys: feat['geometry'] = mapping(shape(feat['geometry']).buffer(buf)) dst.write(feat)
def _populate_array_from_points(self): # TODO: replace with geopandas.shp_to_dataframe with fopen(self.shapefile_path, 'r') as src: for feat in src: coords = feat['geometry']['coordinates'] val = feat['properties']['POINT_TYPE'] self._add_entry(coords, val=val)
def shapefile_area(shapefile): summ = 0 with fopen(shapefile, "r") as src: for feat in src: poly = shape(feat['geometry']) summ += poly.area return summ
def raster_point_extract(raster, points, dtime): """ Get point values from a raster. :param raster: local_raster: Thredds.Gridmet-derived array in Landsat image geometry. :param points: Shapefile of points. :param dtime: Datetime.datetime object. :return: Dict of coords, row/cols, and values of raster at that point. """ point_data = {} with fopen(points, 'r') as src: for feature in src: name = feature['properties']['siteid'] point_data[name] = {'coords': feature['geometry']['coordinates']} with rasopen(raster, 'r') as rsrc: rass_arr = rsrc.read() rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) affine = rsrc.transform for key, val in point_data.items(): x, y = val['coords'] col, row = ~affine * (x, y) val = rass_arr[int(row), int(col)] point_data[key][dtime] = [val, None] return point_data
def raster_point_extract(lst_raster, eef_raster, points): point_data = {} with fopen(points, 'r') as src: for feature in src: try: name = feature['properties']['Name'] except KeyError: name = feature['properties']['FID'] point_data[name] = {'coords': feature['geometry']['coordinates']} with rasopen(lst_raster, 'r') as src: lst_arr = src.read() lst_arr = lst_arr.reshape(lst_arr.shape[1], lst_arr.shape[2]) lst_affine = src.transform with rasopen(eef_raster, 'r') as src: eef_lst = src.read() eef_lst = eef_lst.reshape(eef_lst.shape[1], eef_lst.shape[2]) eef_affine = src.transform for key, val in point_data.items(): x, y, z = val['coords'] col, row = ~lst_affine * (x, y) val = lst_arr[int(row), int(col)] point_data[key]['lst_found'] = val point_data[key]['lst_row_col'] = int(row), int(col) col, row = ~eef_affine * (x, y) val = eef_lst[int(row), int(col)] point_data[key]['eef_lst'] = val point_data[key]['eef_row_col'] = int(row), int(col) return point_data
def get_shapefile_lat_lon(shapefile): ''' Center of shapefile''' with fopen(shapefile, "r") as src: minx, miny, maxx, maxy = src.bounds latc = (maxy + miny) / 2 lonc = (maxx + minx) / 2 return latc, lonc
def tile_bbox(self): with fopen(WRS_2, 'r') as wrs: for feature in wrs: fp = feature['properties'] if fp['PATH'] == self.geography.path and fp[ 'ROW'] == self.geography.row: bbox = feature['geometry'] return bbox
def get_wrs2_features(path, row): with fopen(WRS2) as src: for feat in src: poly = shape(feat['geometry']) propr = feat['properties'] if propr['PATH'] == path and propr['ROW'] == row: return [feat] return None
def test_write_shapefile(self): flux = FluxSite(self.data_perm) data = flux.data flux.write_locations_to_shp(data, self.shape_out) with fopen(self.shape_out, 'r') as shp: count = 0 for feature in shp: count += 1 self.assertEqual(count, 252)
def test_sample_points(self): montana = MontanaTest() p = PixelTrainingArray(root=self.directory, geography=montana, instances=10, overwrite_array=True) p.extract_sample(save_points=True) with fopen(p.shapefile_path, 'r') as src: points = [x for x in src] self.assertGreater(len(points), 40) self.assertGreater(p.extracted_points.shape[0], 40)
def geojson_to_shp(self, out_folder, geojson_dict, srs): try: code = from_epsg(srs) b_json = json.dumps(geojson_dict['geojson']).encode('utf-8') geojson = fiona.ogrext.buffer_to_virtual_file(b_json) file_name = self.char_replace(geojson_dict["name"]) + ".shp" out_path = os.path.join(out_folder, file_name) with fopen(geojson) as source: with fopen(out_path, "w", driver="ESRI Shapefile", crs=code, schema=source.schema) as sink: for rec in source: sink.write(rec) print("Shapefile written to %s " % out_path) except Exception as e: print(e) return
def count_project_fields(source, tables): ct = 0 for t in tables: shp = os.path.join(source, '{}.shp'.format(t)) with fopen(shp, 'r') as src: print(shp) sub_ct = len([x for x in src]) ct += sub_ct print(sub_ct) print(ct)
def test_find_closest_station(self): """ Test find closest agrimet station to any point. :return: """ coords = [] with fopen(self.point_file, 'r') as src: for feature in src: coords.append(feature['geometry']['coordinates']) for coord in coords: agrimet = Agrimet(lon=coord[0], lat=coord[1]) self.assertTrue(agrimet.station in self.site_ids)
def filter_shapefile_overlapping(shapefile, out_directory=None): """ Shapefiles may span multiple path/rows/years. For training, we want all of the data available. This function filters the polygons contained in the shapefile into separate files for each path/row/year contained in the shapefile. """ path_row_map = defaultdict(list) wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') tree, path_rows, features = _construct_kdtree(wrs2) wrs2.close() cent_arr = array([0, 0]) with fopen(shapefile, "r") as src: meta = deepcopy(src.meta) for feat in src: poly = shape(feat['geometry']) centroid = poly.centroid.coords[0] cent_arr[0] = centroid[0] cent_arr[1] = centroid[1] centroid = cent_arr.reshape(1, -1) dist, ind = tree.query(centroid, k=10) tiles = features[ind[0]] prs = get_pr_subset(poly, tiles) # gets the matching path/rows for p in prs: path_row_map[p].append(feat) if out_directory is None: return path_row_map, meta outfile = os.path.basename(shapefile) outfile = os.path.splitext(outfile)[0] for path_row in path_row_map: out = outfile + "_" + path_row + ".shp" with fopen(os.path.join(out_directory, out), 'w', **meta) as dst: print("Saving {}".format(out)) for feat in path_row_map[path_row]: dst.write(feat)
def test_find_sites(self): with fopen(self.test_locations, 'r') as src: for feat in src: lat, lon = feat['geometry']['coordinates'][1], feat[ 'geometry']['coordinates'][0] expected_site = feat['properties']['siteid'] # don't test on neighboring sites if expected_site not in self.excepted_sites: agrimet = Agrimet(lat=lat, lon=lon, start_date=self.start, end_date=self.end) found_site = agrimet.station self.assertEqual(expected_site, found_site)
def multi_raster_point_extract(local_raster, geographic_raster, points, image_profile): """ Get raster data from two rasters at shapefile points. :param local_raster: Thredds.Gridmet-derived array in Landsat image geometry. :param geographic_raster: Native netcdf gridmet raster. :param points: Shapefile points, in geographic crs. :param image_profile: Landsat.LandsatX.profile, contains CRS and other raster info. :return: Dict of coords, row/cols, and values at those points. """ point_data = {} with fopen(points, 'r') as src: for feature in src: try: name = feature['properties']['Name'] except KeyError: name = feature['properties']['FID'] geo_coords = feature['geometry']['coordinates'] image_crs = image_profile['crs'] in_proj = Proj(image_crs) utm = in_proj(geo_coords[0], geo_coords[1]) point_data[name] = {'coords': {'geo': geo_coords, 'utm': utm}} with rasopen(local_raster, 'r') as srrc: local_arr = srrc.read() local_arr = local_arr.reshape(local_arr.shape[1], local_arr.shape[2]) local_affine = srrc.transform with rasopen(geographic_raster, 'r') as ssrc: geo_raster = ssrc.read() geo_raster = geo_raster.reshape(geo_raster.shape[1], geo_raster.shape[2]) geo_affine = ssrc.transform for key, val in point_data.items(): i, j = val['coords']['utm'] col, row = ~local_affine * (i, j) local_val = local_arr[int(row), int(col)] point_data[key]['local_val'] = local_val point_data[key]['local_row_col'] = int(row), int(col) x, y, z = val['coords']['geo'] col, row = ~geo_affine * (x, y) geo_val = geo_raster[int(row), int(col)] point_data[key]['geo_val'] = geo_val point_data[key]['geo_row_col'] = int(row), int(col) return point_data
def save_sample_points(self): points_schema = { 'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]), 'geometry': 'Point' } meta = self.tile_geometry.copy() meta['schema'] = points_schema with fopen(self.shapefile_path, 'w', **meta) as output: for index, row in self.extracted_points.iterrows(): props = dict([('FID', row['FID']), ('POINT_TYPE', row['POINT_TYPE'])]) pt = Point(row['X'], row['Y']) output.write({'properties': props, 'geometry': mapping(pt)}) return None
def test_write_agrimet_shapefile(self): agrimet = Agrimet(write_stations=True) station_data = agrimet.load_stations() epsg = '4326' outfile = self.out_shape agrimet.write_agrimet_sation_shp(station_data, epsg, outfile) with fopen(outfile, 'r') as shp: count = 0 for _ in shp: count += 1 self.assertEqual(186, count) file_list = os.listdir(self.points_dir) for f in file_list: if 'write_test' in f: os.remove(os.path.join(self.points_dir, f))
def __init__(self, vector=None, profile=None, latlon=True): BBox.__init__(self) if vector: with fopen(vector, 'r') as src: self.crs = src.crs self.epsg = int(src.crs['init'].split(":")[1]) self.profile = src.profile self.meta = src.meta self.west, self.south, self.east, self.north = src.bounds if latlon and self.crs != {'init': 'epsg:4326'}: in_proj = Proj(init=self.profile['crs']['init']) self.west, self.north = in_proj(self.west, self.north, inverse=True) self.east, self.south = in_proj(self.east, self.south, inverse=True) else: pass
def _get_polygons(self, vector): with fopen(vector, 'r') as src: crs = src.crs if not crs: raise NoCoordinateReferenceError( 'Provided shapefile has no reference data.') if crs['init'] != 'epsg:4326': raise UnexpectedCoordinateReferenceSystemError( 'Provided shapefile should be in unprojected (geographic)' 'coordinate system, i.e., WGS84, EPSG 4326, {} is not'. format(vector)) clipped = src.filter(mask=self.tile_bbox) polys = [] bad_geo_count = 0 for feat in clipped: try: geo = shape(feat['geometry']) polys.append(geo) except AttributeError: bad_geo_count += 1 return polys
def build_summary_table_monthly(source, shapes, tables, out_loc, project='oe'): master = DataFrame() lat, lon = None, None for table in source: print('Processing {}'.format(table)) try: csv = read_csv(os.path.join(tables, '{}.csv'.format(table))) shp = os.path.join(shapes, '{}.shp'.format(table)) with fopen(shp) as src: # .shp files should be in epsg: 102300 for feat in src: if src.crs != {'init': 'epsg:4326'}: coords = feat['geometry']['coordinates'][0][0] if len(coords) > 2: coords = coords[0] lat, lon = state_plane_MT_to_WGS(coords[1], coords[0]) break else: lat, lon = feat['geometry']['coordinates'][1], feat['geometry']['coordinates'][0] d = DataCollector(project=project, csv=csv, table=table, lat=lat, lon=lon, monthly=True) d.get_table_data_monthly() d.df.dropna(axis=0, how='all', inplace=True) master = concat([master, d.df]) except FileNotFoundError: print('{} not found'.format(table)) if project == 'oe': master['DIVERSIONS'] = DIVERSIONS master = master[master['DIVERSIONS'] > 0.] master['EFF'] = master['Crop_Cons_af'] / master['DIVERSIONS'] master.to_csv(os.path.join(out_loc, 'HUC_8_Monthly.csv'), date_format='%Y-%m')
def point_target_extract(points, nlcd_path, target_shapefile=None, count_limit=None): point_data = {} with fopen(points, 'r') as src: for feature in src: name = feature['id'] proj_coords = feature['geometry']['coordinates'] point_data[name] = {'point': feature['geometry'], 'coords': proj_coords} # point_crs = src.profile['crs']['init'] pt_ct = 0 for pt_id, val in point_data.items(): pt_ct += 1 if pt_ct < count_limit: pt = shape(val['point']) with fopen(target_shapefile, 'r') as target_src: has_attr = False for t_feature in target_src: polygon = t_feature['geometry'] if pt.within(shape(polygon)): print('pt id {}, props: {}' .format(pt_id, t_feature['properties'])) props = t_feature['properties'] point_data[pt_id]['properties'] = {'IType': props['IType'], 'LType': props['LType']} has_attr = True break if not has_attr: if nlcd_path: with rasopen(nlcd_path, 'r') as rsrc: rass_arr = rsrc.read() rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2]) affine = rsrc.affine x, y = val['coords'] col, row = ~affine * (x, y) raster_val = rass_arr[int(row), int(col)] ltype_dct = {'IType': None, 'LType': str(raster_val)} point_data[pt_id]['properties'] = ltype_dct print('id {} has no FLU, ' 'nlcd {}'.format(pt_id, nlcd_value(ltype_dct['LType']))) else: ltype_dct = {'IType': None, 'LType': None} point_data[pt_id]['properties'] = ltype_dct idd = [] ltype = [] itype = [] x = [] y = [] ct = 0 for pt_id, val in point_data.items(): ct += 1 if ct < count_limit: idd.append(pt_id) ltype.append(val['properties']['LType']) itype.append(val['properties']['IType']) x.append(val['coords'][0]) y.append(val['coords'][1]) else: break dct = dict(zip(['ID', 'LTYPE', 'ITYPE', 'X', 'Y'], [idd, ltype, itype, x, y])) df = DataFrame(data=dct) return df
def main(reference_data, raster_classif_file, yaml_file, pre_min_het, pre_max_het, area_threshold, db_str='main'): """FOI assessment is based on spatial analysis of a “thematic” raster produced in advance. The thematic raster can be the result of a any image/raster processing method yielding a class label for each pixel - crop classification, behavior analysis of land phenomenon, gridded data on soil, slope, humidity, etc. The starting point was the idea that inside of an homgeneous parcel we should have only one type of pixel. For example if the thematic raster is the result of a crop classification, inside a parcel we should have only one type of pixels that represent the respective crop If the thematic raster is the result of a behaviour analysis, all the pixels inside a parcel should behave in the same way during a period of time. The FOI assessment is based on the analysis made on the presence and distribution of different types of pixels inside the FOI. Args: reference_data (str): Spatial data to be tested - parcels that will be checked for heterogeneity and cardinality The parcels poligons in .shp file format or database table name without .shp ending. raster_classif_file (str): Thematic raster - classification raster, or raster from other source that will be used for testing heterogeneity and cardinality yaml_file: YAML file that holds the classes of thematic raster file. can be also a simple list of values in the notebook corespondence between pixel values and names for the classes pre_min_het: Minimum thresholds for heterogeneity checks. pre_max_het: Maximum thresholds for heterogeneity checks. area_threshold: Minimum area for clusters selection. Returns: bool: True if successful, False otherwise. """ # database connection string if type(db_str) is str: db_connection = f"PG:{db.conn_str(db_str)}" elif type(db_str) is list: db_connection = "PG:host={} port={} dbname={} user={} password={}".format( *db_str) def db_conn(): if type(db_str) is str: return db.conn(db_str) elif type(db_str) is list: return psycopg2.connect("host={} port={} dbname={} user={} password={}".format(*db_str)) # ogr2ogr options geom_field_name = "GEOMETRY_NAME=wkb_geometry" overwrite_option = "-OVERWRITE" geom_type = "MULTIPOLYGON" output_format = "PostgreSQL" # Path for storing the processed data - final spatial data that will be # exported after database processing processed_data = normpath(join('foi', 'processed_data')) os.makedirs(processed_data, exist_ok=True) # Path for storing the final output data output_data = normpath(join('foi', 'output_data')) os.makedirs(output_data, exist_ok=True) reference_data_name = os.path.splitext(os.path.basename(reference_data))[0] try: with open(f"{config.get_value(['paths','temp'])}tb_prefix", 'r') as f: reference_data_table = f.read() except Exception: reference_data_table = reference_data_name # Vector file resulted from the raster stats pixel count # pixelcount_output = f'{output_data}pixel_count_{reference_data_table}.shp' pixelcount_output = f'{processed_data}/{reference_data_name}_pixelcount.shp' # Vector file resulted from raster to vector process (polygonize) polygonize_output = f'{processed_data}/{reference_data_name}_polygonize.shp' # Name of the table to be created in the database - import of the pixel # count into the database pixelcount_table = f"{reference_data_name}_pixelcount" # Name of the table to be created in the database - import of the # polygonize result into the database polygonize_table = f"{reference_data_name}_polygonize" # Name and path of the files resulted from the analysis heterogeneity_output = f'{output_data}/{reference_data_name}_foih_v1.shp' cardinality_output = f'{output_data}/{reference_data_name}_foic_v1.shp' cardinality_output_clusters = f'{output_data}/{reference_data_name}_foic_clusters_v1.shp' sql = "SELECT * FROM " + reference_data_table + ";" try: ps_connection = db_conn() ps_connection.autocommit = True cursor = ps_connection.cursor() gpd_data = gpd.read_postgis( sql=sql, con=ps_connection, geom_col='wkb_geometry') except (Exception, psycopg2.DatabaseError) as error: print("Error while connecting to PostgreSQL", error) finally: # closing database connection. if(ps_connection): cursor.close() ps_connection.close() # print("PostgreSQL connection is closed") temp_reference_data = f'foi/{reference_data_name}_temp.shp' gpd_data.to_file(temp_reference_data) shape = fiona.open(temp_reference_data) spatialRef = shape.crs["init"] # print("Vector EPSG: ", spatialRef) # Import reference data shapefile to database. # Overwrite option is needed, otherwise the import will append new # values to the ones existing in the table subprocess.call(["ogr2ogr", overwrite_option, "-nlt", geom_type, "-lco", geom_field_name, "-a_srs", spatialRef, "-nln", reference_data_table, "-f", "PostgreSQL", db_connection, reference_data]) # Reading the values from yaml file conf = load(open(yaml_file, 'r').read(), Loader=FullLoader) category_map = conf['category_map'] rst_fields = list(category_map.values()) # Counting the number of pixels for each parcel. The fields with names of # the classes from yaml file will be added, # and updated with the number of pixels from each category with fopen(temp_reference_data, 'r') as input: spatialRef = input.crs["init"] schema = input.schema for i in rst_fields: schema['properties'][i] = 'int:5' rst_attribs = dict.fromkeys(rst_fields, 0) with fopen(pixelcount_output, 'w', 'ESRI Shapefile', schema) as output: for i, vct_feat in enumerate(input): vct_val_dict = dict(vct_feat['properties']) rst_val_dict = zonal_stats( vct_feat, raster_classif_file, categorical=True, copy_properties=True, category_map=category_map, nodata=-999)[0] vct_val_dict.update(rst_attribs) for lu in rst_val_dict: vct_val_dict[lu] = rst_val_dict.get(lu) for atrib in vct_val_dict: vct_feat['properties'][atrib] = vct_val_dict.get(atrib) output.write(vct_feat) print("Finished pixel calculation!") # Import resulted shapefile, with the number of pixels for each class to # database. Overwrite option is needed, otherwise the # import will append new values to the ones existing in the table subprocess.call(["ogr2ogr", overwrite_option, "-nlt", geom_type, "-a_srs", spatialRef, "-nln", pixelcount_table, "-f", "PostgreSQL", db_connection, pixelcount_output]) # Number of classes from the thematic raster num_classes = len(category_map) # Minimum and maximum thresholds for heterogeneity checks. In this example, # any parcel # with percentage of pixels for one class between 30 and 70 from the total, # will be considered heterogenous. # min_heterogeneity_threshold = 30 # max_heterogeneity_threshold = 70 min_heterogeneity_threshold = pre_min_het max_heterogeneity_threshold = pre_max_het # Calling the PostgreSQL function wich checks the heterogeneity. # The function calculates the percentages and sets an attribute # "foi_h" to 1 when the percentage of pixels is between thresholds try: ps_connection = db_conn() ps_connection.autocommit = True cursor = ps_connection.cursor() # call stored procedure cursor.callproc('public.check_heterogeneity', ( pixelcount_table, num_classes, min_heterogeneity_threshold, max_heterogeneity_threshold)) print("Running function to check heterogeneity") except (Exception, psycopg2.DatabaseError) as error: print("Error while connecting to PostgreSQL", error) finally: # closing database connection. if(ps_connection): cursor.close() ps_connection.close() print("PostgreSQL connection is closed") print("Heterogeneity assessment function finished") # Export processed data - heterogeneity, to shapefile subprocess.call(["ogr2ogr", "-f", "ESRI Shapefile", heterogeneity_output, db_connection, pixelcount_table]) print("Heterogeneity analysis output downloaded") # Polygonize the thematic raster. The process takes into account only # one band (in this case - first band). Can be used with 8 connected # pixels or with 4 connected pixels. connectedness = '-8' sourceRaster = gdal.Open(raster_classif_file) band = sourceRaster.GetRasterBand(1) srs = osr.SpatialReference(wkt=sourceRaster.GetProjection()) dst_layername = polygonize_output drv = ogr.GetDriverByName("ESRI Shapefile") dst_ds = drv.CreateDataSource(dst_layername) dst_layer = dst_ds.CreateLayer(dst_layername, srs=srs) fd = ogr.FieldDefn("DN", ogr.OFTInteger) dst_layer.CreateField(fd) dst_field = dst_layer.GetLayerDefn().GetFieldIndex("DN") gdal.Polygonize(band, None, dst_layer, dst_field, [connectedness], callback=None) dst_ds.Destroy() # Import polygonize result to database subprocess.call(["ogr2ogr", overwrite_option, "-nlt", geom_type, "-lco", geom_field_name, "-nln", polygonize_table, "-f", output_format, db_connection, polygonize_output]) # Names of the tables to be created in the database during the processing processed_clusters = polygonize_table + "_clusters" processed_cardinality = polygonize_table + "_cardin" # Spatial data to be tested - parcels that will be checked for cardinality # (I think we should use the same data as for heterogeneity) # reference_table = 'reference_data' # Minimum area for clusters selection - only clusters bigger that the # threshold will be counted # area_threshold = 2000 # Calling the PostgreSQL function wich checks the cardinality. The function # fixes the geometry for the spatial data resulted from polygnize, clips # the polygonize result with the parcels that needs to be checked, # calculates the area of the clusters inside each parcel, selects the # clusters that are more than one type, each of them bigger that the # threshold, in each parcel. # The function creates two new tables: one with the clusters that matches # the conditions, the other with data to be tested and a new column # "foi_c" wich is 1 if the parcel has more that two types of clusters # with the area bigger than the thershold # TO DO: put the unique identifier as function param try: ps_connection = db_conn() ps_connection.autocommit = True cursor = ps_connection.cursor() # call stored procedure # cursor.callproc('public.check_cardinality', ( # polygonize_table, reference_data_table, area_threshold)) cursor.execute( "CALL public.check_cardinality_procedure( %s, %s, %s, %s); ", (polygonize_table, reference_data_table, area_threshold, 10000)) print("Running function to check cardinality") except (Exception, psycopg2.DatabaseError) as error: print("Error while connecting to PostgreSQL", error) finally: # closing database connection. if(ps_connection): cursor.close() ps_connection.close() print("PostgreSQL connection is closed") # Export processed data - clusters, to shapefile subprocess.call(["ogr2ogr", "-f", "ESRI Shapefile", cardinality_output_clusters, db_connection, processed_clusters]) print("Cardinality assessment function finished") # Export processed data - data to be tested with "foi_c" flag, to shapefile subprocess.call(["ogr2ogr", "-f", "ESRI Shapefile", cardinality_output, db_connection, processed_cardinality]) print("Cardinality analysis output downloaded") filelist_temp = [f for f in os.listdir( 'foi') if f.startswith(Path(temp_reference_data).stem)] for f in filelist_temp: os.remove(os.path.join('foi', f))
### Choose csv output file ### csv_filename = conf['csv_output_path'] file_opt['title'] = 'Please select the csv output file' file_opt['filetypes'] = [('csv files', '.csv')] file_opt['initialdir'] = expanduser(csv_filename) csv_filename = asksaveasfilename(**file_opt) if csv_filename == '': print('No csv file name selected') exit() ### Dump the paths to yaml file on disk ### safe_dump(conf, file(yaml_file,'w'), encoding='utf-8', allow_unicode=True, default_flow_style=False) # Open shapefile using Fiona with fopen(vector_filename, 'r') as vct: # Get field names of input vector layer vct_fields = [] feat = vct[0] for fld in feat['properties']: vct_fields.append(fld) # Merge field name lists fieldnames = vct_fields + rst_fields with open(csv_filename, 'wb') as csvfile: # Start writing csv and add attribute names for first row writer = DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader()
def attribute_shapefile(shp, *results): df = None s = None out = shp.replace('.shp', '_pym.shp') agri_schema = { 'geometry': 'Polygon', 'properties': { 'OBJECTID': 'int', 'Supply': 'str', 'Acres': 'float', 'System': 'str', 'Crop': 'str', 'PIXELS': 'int' } } first = True for res in results[0]: year = int(res[-8:-4]) c = read_csv(res) c['MONTH'][c['MONTH'] < 4] = nan c['MONTH'][c['MONTH'] > 10] = nan c.dropna(axis=0, how='any', inplace=True) c = c.groupby('OBJECTID').agg({ 'NDVI': 'mean', 'ETRF': 'mean', 'ETR_MM': 'sum', 'ET_MM': 'sum', 'PPT_MM': 'sum', 'PIXELS': 'median' }).reset_index() renames = { 'NDVI': 'NDVI_{}'.format(year), 'ETRF': 'ETRF_{}'.format(year), 'ETR_MM': 'ETR_{}'.format(year), 'ET_MM': 'ET_{}'.format(year), 'PPT_MM': 'PPT_{}'.format(year) } c.rename(columns=renames, inplace=True) if first: df = deepcopy(c) s = {int(r['OBJECTID']): int(r['PIXELS']) for i, r in c.iterrows()} first = False else: df.drop(columns=['PIXELS', 'OBJECTID'], axis=1, inplace=True) df = concat([df, c], join='outer', axis=1, sort=True) schema_dict = { 'NDVI_{}'.format(year): 'float', 'ETRF_{}'.format(year): 'float', 'ETR_{}'.format(year): 'float', 'ET_{}'.format(year): 'float', 'PPT_{}'.format(year): 'float' } agri_schema['properties'].update(schema_dict) with fopen(shp, 'r') as src: src_crs = src.crs src_driver = src.driver with collection(out, mode='w', driver=src_driver, schema=agri_schema, crs=src_crs) as output: for rec in src: p = rec['properties'] props = { 'OBJECTID': p['OBJECTID'], 'Supply': p['Supply_Sou'], 'Acres': p['Acres'], 'System': p['System_Typ'], 'Crop': p['Crop_Type'] } props.update( df[df['OBJECTID'] == p['OBJECTID']].to_dict('records')[0]) props.update({'PIXELS': s[p['OBJECTID']]}) props['OBJECTID'] = int(props['OBJECTID']) output.write({ 'geometry': rec['geometry'], 'properties': props, 'id': p['OBJECTID'] })
def filter_shapefile_non_overlapping(base, base_shapefile, data_directory): """ Shapefiles may deal with data over multiple path/rows. This is a method to get the minimum number of path/rows required to cover all features. Data directory: where the split shapefiles will be saved. base: directory containing base_shapefile.""" path_row = defaultdict(list) id_mapping = {} # TODO: un hardcode this directory. wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r') tree, path_rows, features = _construct_kdtree(wrs2) wrs2.close() cent_arr = array([0, 0]) with fopen(os.path.join(base, base_shapefile), "r") as src: meta = deepcopy(src.meta) for feat in src: idd = feat['id'] id_mapping[idd] = feat poly = shape(feat['geometry']) centroid = poly.centroid.coords[0] cent_arr[0] = centroid[0] cent_arr[1] = centroid[1] centroid = cent_arr.reshape(1, -1) dist, ind = tree.query(centroid, k=10) tiles = features[ind[0]] prs = get_pr_subset(poly, tiles) for p in prs: path_row[p].append(idd) non_unique_ids = defaultdict(list) unique = defaultdict(list) for key in path_row: ls = path_row[key] # all features in a given path/row placeholder = ls.copy() for key1 in path_row: if key != key1: ls1 = path_row[key1] # find unique keys in ls placeholder = set(placeholder) - set(ls1) #all # features present in placeholder that are not # present in ls1; i.e. unique keys unique[key] = list(placeholder) if len(ls) != len(placeholder): nu = set(ls) - set( placeholder ) # all features present in ls that are not present in placeholder (non-unique) for idd in list(nu): non_unique_ids[idd].append(key) match_key = [] for key in non_unique_ids: # unique ids pr = None hi = 0 for pathrow in non_unique_ids[key]: # path/rows corresponding to non # unique features if len(unique[pathrow]) > hi: pr = pathrow hi = len(unique[pathrow]) if pr is not None: unique[pr].append(key) else: choice = non_unique_ids[key] choice.sort() choice = choice[0] unique[choice].append(key) prefix = os.path.splitext(base_shapefile)[0] for key in unique: if key is None: continue out = prefix + "_" + key + ".shp" if len(unique[key]): with fopen(os.path.join(data_directory, out), 'w', **meta) as dst: print("Saving split shapefile to: {}".format( os.path.join(data_directory, out))) for feat in unique[key]: dst.write(id_mapping[feat])
def tile_geometry(self): with fopen(WRS_2, 'r') as wrs: wrs_meta = wrs.meta.copy() return wrs_meta
def proc(vector_file, raster_file, yaml_file, pre_min_het, pre_max_het, area_threshold): path_data = f"{config.get_value(['paths', 'temp'])}foi/" # database connection string db_connection = f"PG:{database.conn_str(db=1)}" # ogr2ogr options geom_field_name = "GEOMETRY_NAME=geom" overwrite_option = "-OVERWRITE" geom_type = "MULTIPOLYGON" output_format = "PostgreSQL" # Path for storing the processed data - final spatial data that will be exported after database processing processed_data = f'{path_data}processed_data/' os.makedirs(processed_data, exist_ok=True) # Spatial data to be tested - parcels that will be checked for heterogeneity and cardinality reference_data = vector_file # Thematic raster - classification raster, or raster from other # source that will be used for testing heterogeneity and cardinality raster_classif_file = raster_file # YAML file that holds the classes form the thematic raster file - can be also a simple list of values in the notebook # corespondence between pixel values and names for the classes # yaml_file = f'{path_data}pixelvalues_classes.yml' output_data = f'{path_data}output_data/' os.makedirs(output_data, exist_ok=True) reference_data_name = os.path.splitext(os.path.basename(reference_data))[0] try: with open(f"{config.get_value(['paths','temp'])}tb_prefix", 'r') as f: reference_data_table = f.read() except Exception: reference_data_table = reference_data_name # Vector file resulted from the raster stats pixel count #pixelcount_output = f'{output_data}pixel_count_{reference_data_table}.shp' pixelcount_output = f'{processed_data}' + reference_data_name + '_pixelcount.shp' # Vector file resulted from raster to vector process (polygonize) polygonize_output = f'{processed_data}' + reference_data_name + '_polygonize.shp' # Name of the table to be created in the database - import of the pixel count into the database pixelcount_table = f"{reference_data_name}_pixelcount" # Name of the table to be created in the database - import of the polygonize result into the database polygonize_table = f"{reference_data_name}_polygonize" #Name and path of the files resulted from the analysis heterogeneity_output = f'{output_data}' + reference_data_name + '_foih_v1.shp' cardinality_output = f'{output_data}' + reference_data_name + '_foic_v1.shp' cardinality_output_clusters = f'{output_data}' + reference_data_name + '_foic_clusters_v1.shp' shape = fiona.open(reference_data) spatialRef = shape.crs["init"] # print("Vector EPSG: ", spatialRef) # Import reference data shapefile to database. Overwrite option is needed, otherwise the # import will append new values to the ones existing in the table subprocess.call([ "ogr2ogr", overwrite_option, "-nlt", geom_type, "-lco", geom_field_name, "-a_srs", spatialRef, "-nln", reference_data_table, "-f", "PostgreSQL", db_connection, reference_data ]) # Reading the values from yaml file conf = load(open(yaml_file, 'r').read(), Loader=FullLoader) category_map = conf['category_map'] rst_fields = list(category_map.values()) # Counting the number of pixels for each parcel. The fields with names of the classes from yaml file will be added, # and updated with the number of pixels from each category with fopen(reference_data, 'r') as input: spatialRef = input.crs["init"] schema = input.schema for i in rst_fields: schema['properties'][i] = 'int:5' rst_attribs = dict.fromkeys(rst_fields, 0) with fopen(pixelcount_output, 'w', 'ESRI Shapefile', schema) as output: for i, vct_feat in enumerate(input): vct_val_dict = dict(vct_feat['properties']) rst_val_dict = zonal_stats(vct_feat, raster_classif_file, categorical=True, copy_properties=True, category_map=category_map, nodata=-999)[0] vct_val_dict.update(rst_attribs) for lu in rst_val_dict: vct_val_dict[lu] = rst_val_dict.get(lu) for atrib in vct_val_dict: vct_feat['properties'][atrib] = vct_val_dict.get(atrib) output.write(vct_feat) print("Finished pixel calculation!") # Import resulted shapefile, with the number of pixels for each class to database. Overwrite option is needed, otherwise the # import will append new values to the ones existing in the table subprocess.call([ "ogr2ogr", overwrite_option, "-nlt", geom_type, "-a_srs", spatialRef, "-nln", pixelcount_table, "-f", "PostgreSQL", db_connection, pixelcount_output ]) # Number of classes from the thematic raster num_classes = len(category_map) # Minimum and maximum thresholds for heterogeneity checks. In this example, any parcel # with percentage of pixels for one class between 30 and 70 from the total, will be considered heterogenous. # min_heterogeneity_threshold = 30 # max_heterogeneity_threshold = 70 min_heterogeneity_threshold = pre_min_het max_heterogeneity_threshold = pre_max_het # Calling the PostgreSQL function wich checks the heterogeneity. The function calculates the percentages # and sets an attribute "foi_h" to 1 when the percentage of pixels is between thresholds try: ps_connection = database.connection() ps_connection.autocommit = True cursor = ps_connection.cursor() # call stored procedure cursor.callproc( 'public.check_heterogeneity', (pixelcount_table, num_classes, min_heterogeneity_threshold, max_heterogeneity_threshold)) print("Running function to check heterogeneity") except (Exception, psycopg2.DatabaseError) as error: print("Error while connecting to PostgreSQL", error) finally: # closing database connection. if (ps_connection): cursor.close() ps_connection.close() print("PostgreSQL connection is closed") print("Heterogeneity assessment function finished") # Export processed data - heterogeneity, to shapefile subprocess.call([ "ogr2ogr", "-f", "ESRI Shapefile", heterogeneity_output, db_connection, pixelcount_table ]) print("Heterogeneity analysis output downloaded") # Polygonize the thematic raster. The process takes into account only # one band (in this case - first band). Can be used with 8 connected pixels or with 4 connected pixels. connectedness = '-8' sourceRaster = gdal.Open(raster_classif_file) band = sourceRaster.GetRasterBand(1) srs = osr.SpatialReference(wkt=sourceRaster.GetProjection()) dst_layername = polygonize_output drv = ogr.GetDriverByName("ESRI Shapefile") dst_ds = drv.CreateDataSource(dst_layername) dst_layer = dst_ds.CreateLayer(dst_layername, srs=srs) fd = ogr.FieldDefn("DN", ogr.OFTInteger) dst_layer.CreateField(fd) dst_field = dst_layer.GetLayerDefn().GetFieldIndex("DN") gdal.Polygonize(band, None, dst_layer, dst_field, [connectedness], callback=None) dst_ds.Destroy() # Import polygonize result to database subprocess.call([ "ogr2ogr", overwrite_option, "-nlt", geom_type, "-lco", geom_field_name, "-nln", polygonize_table, "-f", output_format, db_connection, polygonize_output ]) # Names of the tables to be created in the database during the processing processed_clusters = polygonize_table + "_clusters" processed_cardinality = polygonize_table + "_cardin" # Spatial data to be tested - parcels that will be checked for cardinality (I think we should use the same # data as for heterogeneity) # reference_table = 'reference_data' # Minimum area for clusters selection - only clusters bigger that the threshold will be counted # area_threshold = 2000 # Calling the PostgreSQL function wich checks the cardinality. The function fixes the geometry # for the spatial data resulted from polygnize, clips the polygonize result with the parcels that needs to be checked, # calculates the area of the clusters inside each parcel, selects the clusters that are more than one type, each of # them bigger that the threshold, in each parcel. # The function creates two new tables: one with the clusters that matches the conditions, # the other with data to be tested and a new column "foi_c" wich is 1 if the parcel has more that two types # of clusters with the area bigger than the thershold # TO DO: put the unique identifier as function param try: ps_connection = database.connection() ps_connection.autocommit = True cursor = ps_connection.cursor() # call stored procedure # cursor.callproc('public.check_cardinality', # (polygonize_table, reference_data_table, area_threshold)) cursor.execute( "CALL public.check_cardinality_procedure( %s, %s, %s, %s); ", (polygonize_table, reference_data_table, area_threshold, 10000)) print("Running function to check cardinality") except (Exception, psycopg2.DatabaseError) as error: print("Error while connecting to PostgreSQL", error) finally: # closing database connection. if (ps_connection): cursor.close() ps_connection.close() print("PostgreSQL connection is closed") # Export processed data - clusters, to shapefile subprocess.call([ "ogr2ogr", "-f", "ESRI Shapefile", cardinality_output_clusters, db_connection, processed_clusters ]) print("Cardinality assessment function finished") # Export processed data - data to be tested with "foi_c" flag, to shapefile subprocess.call([ "ogr2ogr", "-f", "ESRI Shapefile", cardinality_output, db_connection, processed_cardinality ]) print("Cardinality analysis output downloaded")