def _tile_process_(args): _filelist, _outfile, _band_order, _tile_dict, _composite_type = args _tile_coords = _tile_dict['block_coords'] _xmin, _ymin = _tile_dict['first_pixel'] _x, _y, _cols, _rows = _tile_coords _outfile = '/vsimem/' + Handler(_outfile).basename.split('.tif')[0] + \ '_{}.tif'.format('_'.join([str(j) for j in _tile_coords])) _mraster = MultiRaster(_filelist) _layerstack_vrt = _mraster.layerstack(return_vrt=True, outfile=_outfile) _lras = Raster('_tmp_layerstack') _lras.datasource = _layerstack_vrt _lras.initialize() _tile_arr = _lras.get_tile(_band_order, _tile_coords).copy() if _composite_type == 'mean': _temp_arr = np.apply_along_axis(lambda x: np.mean(x[x != _lras.nodatavalue]) if (x[x != _lras.nodatavalue]).shape[0] > 0 else _lras.nodatavalue, 0, _tile_arr) elif _composite_type == 'median': _temp_arr = np.apply_along_axis(lambda x: np.median(x[x != _lras.nodatavalue]) if (x[x != _lras.nodatavalue]).shape[0] > 0 else _lras.nodatavalue, 0, _tile_arr) elif _composite_type == 'max': _temp_arr = np.apply_along_axis(lambda x: np.max(x[x != _lras.nodatavalue]) if (x[x != _lras.nodatavalue]).shape[0] > 0 else _lras.nodatavalue, 0, _tile_arr) elif _composite_type == 'min': _temp_arr = np.apply_along_axis(lambda x: np.min(x[x != _lras.nodatavalue]) if (x[x != _lras.nodatavalue]).shape[0] > 0 else _lras.nodatavalue, 0, _tile_arr) elif 'pctl' in _composite_type: pctl = int(_composite_type.split('_')[1]) _temp_arr = np.apply_along_axis(lambda x: np.percentile(x[x != _lras.nodatavalue], pctl) if (x[x != _lras.nodatavalue]).shape[0] > 0 else _lras.nodatavalue, 0, _tile_arr) else: _temp_arr = None _lras = None _mraster = None Handler(_outfile).file_delete() _tile_arr = None return (_y-_ymin), ((_y-_ymin) + _rows), (_x-_xmin), ((_x-_xmin) + _cols), _temp_arr
def main(gedi_dir, temp_dir, bounds_file, outfile, nproc, res): """ Main function to execute python script """ nproc = int(nproc) - 1 attrib = {'BEAM': 'int', 'FILE': 'str', 'YEAR': 'int', 'JDAY': 'int'} res = float(res) bounds_vec = Vector(bounds_file) bounds_wkt = bounds_vec.wktlist[0] args_list = list((filename, temp_dir, bounds_wkt, res) for filename in Handler(dirname=gedi_dir).find_all('*.h5')) n_files = len(args_list) Opt.cprint('Number of files: {}'.format(str(n_files))) out_res = {} Handler(outfile).file_delete() pool = mp.Pool(processes=nproc) for file_output, err_str in pool.imap_unordered(get_path, args_list): if err_str is None and len(file_output) > 0: add_on = '' count = 0 for geom_wkt, attrs in file_output: if geom_wkt is not None: write_to_txt(geom_wkt, attrs, outfile) out_res[geom_wkt] = attrs count += 1 if count > 0: add_on = ' - FOUND {} BEAMS'.format(str(count)) out_str = str(list(set([attr['FILE'] for _, attr in file_output]))[0]) + ' : READ' + add_on Opt.cprint(out_str) else: if err_str is None: err_str = 'Unknown File I/O Error' Opt.cprint('{}: {}'.format(file_output, err_str)) pool.close() Opt.cprint(outfile)
def _get_tile_data_(_filelist, _outfile, _band_order, _tile_specs, _composite_type): _vrtfile = '/vsimem/' + Handler(_outfile).basename.split('.tif')[0] + '_tmp_layerstack.vrt' _outfile = '/vsimem/' + Handler(_outfile).basename.split('.tif')[0] + '_tmp_layerstack.tif' _mraster = MultiRaster(_filelist) _layerstack_vrt = _mraster.layerstack(return_vrt=True, outfile=_outfile) _lras = Raster('_tmp_layerstack') _lras.datasource = _layerstack_vrt _lras.initialize() _lras.make_tile_grid(*_tile_specs) for _ii in range(_lras.ntiles): yield _filelist, _outfile, _band_order, _lras.tile_grid[_ii], _composite_type
def write_to_txt(geom_wkt_str, attr, outfile_name): """ Method to write or append a geometry and attribute to a text file :param geom_wkt_str: Geometry WKT string :param attr: Dictionary of attributes :param outfile_name: Name of output file :return: None """ if outfile is None: raise ValueError("No file name for writing") if Handler(outfile_name).file_exists(): with open(outfile_name, 'a') as f: f.write(str(geom_wkt_str) + ' ; ' + json.dumps(attr) + '\n') else: with open(outfile_name, 'w') as f: f.write(str(geom_wkt_str) + ' ; ' + json.dumps(attr) + '\n')
'#6C327A', '#62E162', '#2F8066', '#F56D94', '#3E9FEF', '#A0B48C', '#764728', '#EF963E', '#A45827' ] area = 'area' decid_arr = np.zeros((9, 3), dtype=np.float64) decid_uarr = decid_arr.copy() tc_arr = decid_arr.copy() tc_uarr = decid_arr.copy() area_arr = decid_arr.copy() for i, plotfile in enumerate(plotfiles): file_dicts = Handler(plotdir + plotfile).read_from_csv(return_dicts=True) for file_dict in file_dicts: for j, zone in enumerate(zones): if file_dict['ZONE_NAME'] == zone[0]: # decid decid_perc = file_dict[decid_diff_names[2]] / ( file_dict[area]) * 100.0 # decid perc decid_uperc = file_dict[decid_diff_names[1]] / ( file_dict[area]) * 100.0 # tc tc_perc = file_dict[tc_diff_names[2]] / (file_dict[area]) # tc perc tc_uperc = file_dict[tc_diff_names[1]] / (file_dict[area])
''' # infile = "C:/temp/decid_tc_2000_layerstack-0000026880-0000161280.tif" infile = "D:/temp/albedo/decid_tc_2000-0000098304-0000425984_clip_1deg_1deg.tif" outdir = "D:/temp/albedo/" pickle_dir = "d:/shared/Dropbox/projects/NAU/landsat_deciduous/data/albedo_data/" picklefiles = ("RFalbedo_deciduous_fraction_treecover_50000_cutoff_5_deg1_20200501T185635_spring.pickle", "RFalbedo_deciduous_fraction_treecover_50000_cutoff_5_deg1_20200501T185635_summer.pickle", "RFalbedo_deciduous_fraction_treecover_50000_cutoff_5_deg1_20200501T185635_fall.pickle") picklefiles = [pickle_dir + picklefile for picklefile in picklefiles] band_name = 'spr_albedo' outfile = outdir + Handler(Handler(infile).basename).add_to_filename('_output3') Handler(outfile).file_remove_check() # raster contains three bands: 1) decid 2) tree cover 3) land extent mask. # all the bands are in integer format raster = Raster(infile) raster.initialize() raster.bnames = ['decid', 'treecover'] raster.get_stats(True) Opt.cprint(raster.shape) regressor = RFRegressor.load_from_pickle(picklefiles[0])
Opt.cprint('Band order: ' + ', '.join([str(b) for b in band_order])) # re-initialize raster ras.initialize(get_array=True, band_order=band_order) Opt.cprint(ras) Opt.cprint(ras.bnames) Opt.cprint('Multipliers: {}\n'.format(str(band_multipliers))) hierarchical_regressor = HRFRegressor(regressor=(rf_regressor1, rf_regressor2)) print(hierarchical_regressor) uncert_file = Handler(ras.name).add_to_filename('_uncertainty') if not Handler(uncert_file).file_exists(): # classify raster and write to file classif = hierarchical_regressor.regress_raster( ras, tile_size=tile_size, output_type='median', band_name='prediction', outdir=outdir, nodatavalue=nodatavalue, band_multipliers=band_multipliers) Opt.cprint(classif) classif.write_to_file(compress='lzw', bigtiff='yes')
attr_dict = dict() for k, v in attr.items(): attr_dict[k] = Vector.string_to_ogr_type(v, 'name') order = np.argsort( np.array([ int(attr_dict[fire_year_col]) for attr_dict in vec.attributes ])).tolist() x_min, x_max, y_min, y_max = vec.layer.GetExtent() sys.stdout.write('Extent: {}\n'.format(' '.join( [str(x_min), str(x_max), str(y_min), str(y_max)]))) for year in range(start_year, end_year): outfile = outfolder + Handler(infile).basename.split( '.shp')[0] + '_year_{}_250m.tif'.format(str(year)) sys.stdout.write( '---------------------\nOutfile: {}\n'.format(outfile)) if type(attr[fire_year_col]) in (int, float, long): layer = vec.datasource.ExecuteSQL( "SELECT * from {} WHERE {}={}".format( vec.name, fire_year_col, str(year))) elif type(attr[fire_year_col]) == str: layer = vec.datasource.ExecuteSQL( "SELECT * from {} WHERE {}='{}'".format( vec.name, fire_year_col, str(year))) else: raise ValueError( "unknown or null property type for selected attribute")
boreal_bounds = "D:/shared/Dropbox/projects/NAU/landsat_deciduous/data/STUDY_AREA/boreal/" \ "NABoreal_simple_10km_buffer_geo.shp" year_bins = [(1984, 1997), (1998, 2002), (2003, 2007), (2008, 2012), (2013, 2018)] # script----------------------------------------------------------------------------------------------- boreal_vec = Vector(boreal_bounds) boreal_geom = Vector.get_osgeo_geom(boreal_vec.wktlist[0]) year_samp = list(list() for _ in range(len(year_bins))) year_samp_reduced = list(list() for _ in range(len(year_bins))) # get data and names file_data = Handler(infile).read_from_csv(return_dicts=True) header = list(file_data[0]) print('\nTotal samples: {}'.format(str(len(file_data)))) boreal_samp_count = 0 # bin all samples based on sample years using year_bins for elem in file_data: for i, years in enumerate(year_bins): if years[0] <= elem['year'] <= years[1]: year_samp[i].append(elem) # take mean of all samples of the same site that fall in the same year bin for i, samp_list in enumerate(year_samp): print('year: {}'.format(str(year_bins[i])))
from geosoup import Vector, Handler import argparse import sys if __name__ == "__main__": # script, filename = sys.argv in_folder = "D:/temp/above2017_0629_lvis2b/l2b/" out_folder = "D:/temp/above2017_0629_lvis2b_tif/" spref = osr.SpatialReference() spref.ImportFromEPSG(4326) spref_wkt = spref.ExportToWkt() filelist = Handler(dirname=in_folder).find_all('*.h5') for hdf5_file in filelist: outfile = out_folder + Handler(hdf5_file).basename.replace( '.h5', '.tif') print( '\n===============================================================================================' ) print('Input file: {}'.format(hdf5_file)) print('Output file: {}'.format(outfile)) fs = h5py.File(hdf5_file, 'r') file_keys = []
def read_gee_extract_data(filename): """ Method to read sample data in the form of a site dictionary with samples dicts by year :param filename: Input data file name :return: dict of list of dicts by year """ lines = Handler(filename).read_from_csv(return_dicts=True) site_dict = dict() line_counter = 0 for j, line in enumerate(lines): include = True for key, val in line.items(): if type(val).__name__ == 'str': if val == 'None': include = False if saturated_bands(line['radsat_qa']) \ or line['GEOMETRIC_RMSE_MODEL'] > 15.0 \ or unclear_value(line['pixel_qa']): include = False if include: line_counter += 1 site_year = str(line['site']) + '_' + str(line['year']) if site_year not in site_dict: geom_wkt = Vector.wkt_from_coords((line['longitude'], line['latitude'])) site_dict[site_year] = {'geom': geom_wkt, 'decid_frac': line['decid_frac'], 'data': dict(), 'site_year': line['year'], 'site': line['site']} temp_dict = dict() sensor_dict = extract_date(line['LANDSAT_ID']) temp_dict['img_jday'] = sensor_dict['date'].timetuple().tm_yday temp_dict['img_year'] = sensor_dict['date'].timetuple().tm_year temp_dict['sensor'] = sensor_dict['sensor'] bands = list('B' + str(ii + 1) for ii in range(7)) + ['slope', 'elevation', 'aspect'] band_dict = dict() for band in bands: if band in line: band_dict[band] = line[band] temp_dict['bands'] = correct_landsat_sr(band_dict, sensor_dict['sensor'], scale=0.0001) site_dict[site_year]['data'].update({'{}_{}'.format(str(temp_dict['img_jday']), str(temp_dict['img_year'])): temp_dict}) # print(line_counter) return site_dict
if len(num_list) > 1: md_vec = [md_vec[x] for x in num_list] # find all MD values that as less than cutoff percentile loc = list(i for i, x in enumerate(md_vec) if (x <= np.percentile(md_vec, md_cutoff) and x != np.nan)) out_samples += list(binned_samp_dicts[i] for i in loc) else: out_samples += binned_samp_dicts else: Opt.cprint('Too few samples for cleaning') out_samples += binned_samp_dicts Opt.cprint('After Mahalanobis dist removal of all samp above {} percentile: {}'.format(str(md_cutoff), str(len(out_samples)))) else: out_samples = out_list Handler.write_to_csv(out_samples, outfile) Opt.cprint('Done!')
def get_path(args): """ Method to extract path from a GEDI file args: filename: GEDI filename bounds_wkt: WKT representation of boundary geometry res: bin resolution (degrees) (default : 0.1 degrees) buffer: buffer in degrees :return: (attribute dictionary, geometry WKT, None) if no error is raised while opening file (None, None, error string) if error is raised """ pt_limit = 15 verbose = False filename, temp_dir, boundary_wkt, spatial_resolution = args if verbose: Opt.cprint('Working on - {} '.format(Handler(filename).basename)) Handler(filename).copy_file(temp_dir) temp_filename = temp_dir + Handler(filename).basename date_str = Handler(temp_filename).basename.split('_')[2] year = int(date_str[0:4]) julian_day = int(date_str[4:7]) bounds_geom = ogr.CreateGeometryFromWkt(boundary_wkt) file_keys = [] try: fs = h5py.File(temp_filename, 'r') fs.visit(file_keys.append) except Exception as e: return Handler(temp_filename).basename, ' '.join(e.args) beam_ids = list(set(list(key.split('/')[0].strip() for key in file_keys if 'BEAM' in key))) feat_list = [] err = 'No Keys found' for beam in beam_ids: beam_id = int(beam.replace('BEAM', ''), 2) if verbose: Opt.cprint('\nBEAM - {}'.format(beam_id), newline=' ') try: lat_arr = np.array(fs['{}/geolocation/latitude_bin0'.format(beam)]) lon_arr = np.array(fs['{}/geolocation/longitude_bin0'.format(beam)]) except Exception as e: err = ' '.join(e.args) continue # make an array of lat lon pt_arr = np.vstack([lon_arr, lat_arr]).T # remove NaN values nan_loc_pre = np.where(np.apply_along_axis(lambda x: (not (np.isnan(x[0])) and (not np.isnan(x[1]))), 1, pt_arr)) pt_arr = pt_arr[nan_loc_pre] groups = group_nearby(pt_arr) # find start and end of valid strips chunks = list(pt_arr[elem[0]:(elem[1] + 1), :] for elem in groups) main_geom = ogr.Geometry(ogr.wkbMultiLineString) any_geom = False # find polygons for each strip and add to main_geom for chunk in chunks: if chunk.shape[0] <= pt_limit: if verbose: Opt.cprint('chunk too short size={},'.format(chunk.shape[0]), newline=' ') continue else: if verbose: Opt.cprint('chunk size={},'.format(str(chunk.shape[0])), newline=' ') try: resampled_chunk = resample_chunk(chunk, spatial_resolution) except Exception as e: if verbose: Opt.cprint('invalid chunk({})'.format(e.args[0]), newline=' ') continue part_geom_json = json.dumps({'type': 'Linestring', 'coordinates': resampled_chunk.tolist()}) part_geom = Vector.get_osgeo_geom(part_geom_json, 'json') if part_geom.Intersects(bounds_geom): any_geom = True part_geom_intersection = part_geom.Intersection(bounds_geom) # add to main geometry main_geom.AddGeometryDirectly(part_geom_intersection) attributes = {'BEAM': beam_id, 'FILE': Handler(temp_filename).basename, 'YEAR': year, 'JDAY': julian_day} if any_geom: # Opt.cprint(attributes) wkt = main_geom.ExportToWkt() main_geom = None else: wkt = None feat_list.append((wkt, attributes)) fs.close() Handler(temp_filename).file_delete() if len(feat_list) == 0: return Handler(filename).basename, err else: return feat_list, None
if __name__ == '__main__': infilename = "D:/Shared/Dropbox/projects/NAU/landsat_deciduous/data/samples/CAN_PSP/" \ "CAN_PSPs_Hember-20180207T213138Z-001/CAN_PSPs_Hember/NAFP_L4_SL_ByJur_R16d_ForBrendanRogers1.csv" outfilename = "D:/Shared/Dropbox/projects/NAU/landsat_deciduous/data/samples/CAN_PSP/" \ "CAN_PSPs_Hember-20180207T213138Z-001/CAN_PSPs_Hember/NAFP_L4_SL_ByJur_R16d_ForBrendanRogers1_lat52_ABoVE.shp" bounds = "D:/Shared/Dropbox/projects/NAU/landsat_deciduous/data/STUDY_AREA/ABoVE_Study_Domain_geo.shp" bounds_vec = Vector(bounds) bounds_geom = bounds_vec.features[0].GetGeometryRef() attr = {'ID_Plot': 'str', 'Lat': 'float', 'Lon': 'float'} samp_data = Handler(infilename).read_from_csv(return_dicts=True) wkt_list = list() attr_list = list() spref_str = '+proj=longlat +datum=WGS84' latlon = list() count = 0 for row in samp_data: print('Reading elem: {}'.format(str(count + 1))) elem = dict() for header in list(attr): elem[header] = row[header] samp_geom = Vector.get_osgeo_geom(
continue elif 'ServerNotFoundError' in e.args[0] or \ 'Unable to find the server' in e.args[0] or \ 'getaddrinfo failed' in e.args[0] or \ 'connection attempt failed' in e.args[0]: log.lprint('Waiting 30 secs...'.format(str(wait))) time.sleep(wait) continue else: continue # all extracted dictionaries to file if not Handler(OUTFILE).file_exists(): Handler.write_to_csv(temp_dicts, header=True, append=False, outfile=OUTFILE) else: Handler.write_to_csv(temp_dicts, header=False, append=True, outfile=OUTFILE) time2 = datetime.datetime.now() log.lprint( 'Time taken for site {s} ({ii} of {nn}): {t} seconds'.format(
yvar = 'albedo' zvar = 'treecover' xlabel = 'deciduous_fraction' ylabel = 'albedo' zlabel = 'treecover' xvar_bands = ['decid2010', 'decid2005', 'decid2000'] zvar_bands = ['tc2010', 'tc2005', 'tc2000'] oz_bands = ['connected_mask_val18', 'land_extent'] print('Reading file : {}'.format(csv_file)) val_dicts = Handler(csv_file).read_from_csv( return_dicts=True, read_random=True, line_limit=None, ) basename = Handler(csv_file).basename.split('.csv')[0] plot_file = in_dir + "RF{}_{}_{}_{}_cutoff_{}_deg{}_{}.png".format( ylabel, xlabel, zlabel, str(bin_limit), str(int(z_thresh * scaledown_treecover)), deg, datetime.now().isoformat().split('.')[0].replace('-', '').replace( ':', '')) print('Plot file: {}'.format(plot_file)) # -------------------- spring ----------------------------------------------------------------------- yvar_bands = [
trn_outfile = outdir + "ABoVE_AK_all_2010_trn_samp_original.csv" val_outfile = outdir + "ABoVE_AK_all_2010_val_samp_original.csv" # names to append to samples' header header = ['site', 'sample'] # bands used as features for cleaning the samples bandnames = ['NDVI', 'NDVI_1', 'NDVI_2'] # script----------------------------------------------------------------------------------------------- # get data and names names, data = Handler(infile).read_csv_as_array() for name in names[1:-1]: header.append(name) print(header) site_data = list() # convert strings like '1_125_3' into sites and samples for elem in data: index = elem[0].split('_') if len(index) == 3: site_id = int(index[0])*10000 + int(index[1])
tile_size = (1024, 1024) image_bounds = (-130.999, -90.0, 40.0, 50.0) # xmin, xmax, ymin, ymax ''' script, file_folder, outdir, startyear, endyear, startdate, enddate, reducer, ver, nthreads = sys.argv tile_size = (1024, 1024) image_bounds = (-179.999, -50.0, 30.0, 75.0) # xmin, xmax, ymin, ymax startyear = int(startyear) endyear = int(endyear) startdate = int(startdate) enddate = int(enddate) nthreads = int(nthreads)-1 all_files = Handler(dirname=file_folder).find_all('*_albedo.tif') num_list = np.array(list(list(int(elem_) for elem_ in Handler(elem).basename.replace('_albedo.tif', '').replace('bluesky_albedo_', '').split('_')) for elem in all_files)) tile_specs = (tile_size[0], tile_size[1], image_bounds, 'crs') pool = mp.Pool(processes=nthreads) Opt.cprint((startdate, enddate)) Opt.cprint((startyear, endyear)) Opt.cprint(len(all_files)) outfile = outdir + '/albedo_composite_{}_{}_{}_{}_{}_v{}.tif'.format(reducer, str(startyear),
fig = plt.figure(figsize=(22, 10)) gs = gridspec.GridSpec(10, 22) # rows, cols # ----------------------------------------------------------------------------------------------------------- scale = 1e11 ylim = (-5.0, 3.0) decid_cparr = np.zeros((9, ), dtype=np.float64) decid_cnarr = np.zeros((9, ), dtype=np.float64) decid_ucparr = decid_cparr.copy() decid_ucnarr = decid_cnarr.copy() file_dicts = Handler(forc_file).read_from_csv(return_dicts=True) for file_dict in file_dicts: for j, zone in enumerate(zones): if file_dict['ZONE_NAME'] == zone[0]: # print(file_dict) decid_cparr[j] = file_dict[decid_names[0]] / (scale * 1e4) decid_cnarr[j] = file_dict[decid_names[1]] / (scale * 1e4) decid_ucparr[j] = (file_dict[decid_names[0]] / (scale * 1e4)) * 0.33 decid_ucnarr[j] = (file_dict[decid_names[1]] / (scale * 1e4)) * 0.25 # zone_names = sorted(list(set(zone_names))) ax1 = fig.add_subplot(gs[1:9, 1:10])
# first slice west of 0 deg lon first_slice = np.vstack([arr[channel_indx, (arr.shape[1] - row_indx - 1), cut_loc:] for row_indx in range(arr.shape[1])]) # second slice east of 0 deg lon second_slice = np.vstack([arr[channel_indx, (arr.shape[1] - row_indx - 1), :cut_loc] for row_indx in range(arr.shape[1])]) resliced_arr_list.append(np.hstack([first_slice, second_slice])) # stack all months arr = np.stack(resliced_arr_list, 0) # name output file outfile = Handler(file1).dirname + Handler().sep + 'ALBEDO_CAM5_{}_KERNEL.tif'.format(var_names[variable]) # define raster object ras = Raster(outfile, array=arr, bnames=months, dtype=GDAL_FIELD_DEF['double'], shape=arr.shape, transform=transform, crs_string=spref.ExportToWkt()) # define no data value ras.nodatavalue = data._FillValue # write raster object ras.write_to_file()