# load the full mask, which we'll re-project later mask_full = GeoImg('../southeast_average_corr.tif') for yr in [2012, 2013]: print("Loading {} data files.".format(yr)) ifsar = GeoImg('{}/seak.ifsar.{}.dem.30m_adj.tif'.format(yr, yr)) ifsar_srtm = GeoImg('{}/ifsar_srtm_{}_dh.tif'.format(yr, yr)) srtm = GeoImg('{}/SRTM_SE_Alaska_30m_{}IfSAR_adj.tif'.format(yr, yr)) valid_area = np.isfinite(ifsar.img) glac_shp = '../outlines/01_rgi60_Alaska_GlacierBay_02km_UTM_{}.shp'.format(yr) glacier_mask = it.create_mask_from_shapefile(ifsar, glac_shp) mask_geo = mask_full.reproject(ifsar_srtm) corrs = [35, 50, 70, 80, 90, 95] for i, corr in enumerate(corrs): corr_mask = mask_geo.img < corr masked_ifsar = ifsar.copy() masked_ifsar.img[corr_mask] = np.nan masked_ifsar_srtm = ifsar_srtm.copy() masked_ifsar_srtm.img[corr_mask] = np.nan print("Linear interpolation of dH") ifsar_srtm_lin_interp = dt.fill_holes(masked_ifsar_srtm, dt.linear, valid_area=valid_area) ifsar_srtm_lin_interp.write('ifsar_srtm_{}_dHinterp.tif'.format(yr), out_folder='filled_ddems/{}/void{}'.format(yr, corr))
def main(): parser = _argparser() args = parser.parse_args() if args.plot_curves: # set font stuff font = {'family': 'sans', 'weight': 'normal', 'size': 22} # legend_font = {'family': 'sans', # 'weight': 'normal', # 'size': '16'} matplotlib.rc('font', **font) # load base dem print('Loading DEM {}'.format(args.basedem)) basedem = GeoImg(args.basedem) print('DEM loaded.') # get glacier masks if args.glac_mask is None: print('Rasterizing glacier polygons to DEM extent.') master_mask, master_glacs = it.rasterize_polygons(basedem, args.glac_outlines, burn_handle='fid') master_mask[master_mask < 0] = np.nan else: print('Loading raster of glacier polygons {}'.format(args.glac_mask)) master_mask_geo = GeoImg(args.glac_mask) master_mask = master_mask_geo.img master_glacs = np.unique(master_mask[np.isfinite(master_mask)]) # master_mask = np.logical_and(master_mask, np.isfinite(basedem.img)) # get names gshp = gpd.read_file(args.glac_outlines) print('Glacier masks loaded.') # create output folder if it doesn't already exist os.system('mkdir -p {}'.format(args.out_folder)) # create folders to store glacier dH curve figures for g in gshp[args.namefield]: os.system('mkdir -p {}'.format(os.path.sep.join([args.out_folder, g]))) print('Getting glacier AADs.') # get aad aad_bins, aads = area_alt_dist(basedem, master_mask, glacier_inds=master_glacs) # initialize pd dataframes for dH_curves df_list = [pd.DataFrame(aad_bin, columns=['elevation']) for aad_bin in aad_bins] g_list = [str(gshp[args.namefield][gshp['fid'] == glac].values[0]) for glac in master_glacs] df_dict = dict(zip(g_list, df_list)) # turn aad_bins, aads into dicts with RGIId as keys bin_dict = dict(zip(g_list, aad_bins)) aad_dict = dict(zip(g_list, aads)) for i, df in enumerate(df_list): df['area'] = pd.Series(aads[i], index=df.index) # now that we have the AADs, make sure we preserve that distribution when we reproject. bin_widths = [np.diff(b)[0] for b in aad_bins] basedem.img[np.isnan(master_mask)] = np.nan # remove all elevations outside of the glacier mask for i, g in enumerate(master_glacs): basedem.img[master_mask == g] = np.floor(basedem.img[master_mask == g] / bin_widths[i]) * bin_widths[i] # get a list of all dH dH_list = glob('{}/*.tif'.format(args.dH_folder)) # initialize ur_dataframe ur_df = pd.DataFrame([os.path.basename(x) for x in dH_list], columns=['filename']) ur_df['dem1'] = [nice_split(x)[0] for x in ur_df['filename']] ur_df['dem2'] = [nice_split(x)[1] for x in ur_df['filename']] date1 = [parse_filename(x) for x in ur_df['dem1']] date2 = [parse_filename(x) for x in ur_df['dem2']] ur_df['date1'] = date1 ur_df['date2'] = date2 ur_df['delta_t'] = [(x - y).days / 365.2425 for x, y in list(zip(date1, date2))] ur_df['centerdate'] = [(y + dt.timedelta((x - y).days / 2)) for x, y in list(zip(date1, date2))] print('Found {} files in {}'.format(len(dH_list), args.dH_folder)) print('Getting dH curves.') for i, dHfile in enumerate(dH_list): dH = GeoImg(dHfile) print('{} ({}/{})'.format(dH.filename, i+1, len(dH_list))) if args.glac_mask is None: dh_mask, dh_glacs = it.rasterize_polygons(dH, args.glac_outlines, burn_handle='fid') else: tmp_dh_mask = master_mask_geo.reproject(dH, method=GRA_NearestNeighbour) dh_mask = tmp_dh_mask.img dh_glacs = np.unique(dh_mask[np.isfinite(dh_mask)]) tmp_basedem = basedem.reproject(dH, method=GRA_NearestNeighbour) deltat = ur_df.loc[i, 'delta_t'] this_fname = ur_df.loc[i, 'filename'] for i, glac in enumerate(dh_glacs): this_name = str(gshp[args.namefield][gshp['fid'] == glac].values[0]) this_dem = tmp_basedem.img[dh_mask == glac] this_ddem = dH.img[dh_mask == glac] this_ddem[np.abs(this_ddem) > args.outlier] = np.nan if np.count_nonzero(np.isfinite(this_ddem)) / this_ddem.size < 0.25: continue # these_bins = get_bins(this_dem, dh_mask) filtered_ddem = outlier_filter(bin_dict[this_name], this_dem, this_ddem) # _, odH_curve = get_dH_curve(this_dem, this_ddem, dh_mask, bins=aad_bins) _, fdH_curve, fbin_area = get_dH_curve(this_dem, filtered_ddem, dh_mask, bins=bin_dict[this_name]) _, fdH_median, _ = get_dH_curve(this_dem, filtered_ddem, dh_mask, bins=bin_dict[this_name], mode='median') fbin_area = 100 * fbin_area * np.abs(dH.dx) * np.abs(dH.dy) / aad_dict[this_name] if args.plot_curves: plot_dH_curve(this_ddem, this_dem, bin_dict[this_name], fdH_curve, fdH_median, fbin_area, dH.filename.strip('.tif')) plt.savefig(os.path.join(args.out_folder, this_name, dH.filename.strip('.tif') + '.png'), bbox_inches='tight', dpi=200) plt.close() # write dH curve in units of dH/dt (so divide by deltat) this_fname = this_fname.rsplit('.tif', 1)[0] df_dict[this_name][this_fname + '_mean'] = pd.Series(fdH_curve / deltat, index=df_dict[this_name].index) df_dict[this_name][this_fname + '_med'] = pd.Series(fdH_median / deltat, index=df_dict[this_name].index) df_dict[this_name][this_fname + '_pct'] = pd.Series(fbin_area, index=df_dict[this_name].index) print('Writing dH curves to {}'.format(args.out_folder)) # write all dH_curves for g in df_dict.keys(): print(g) df_dict[g].to_csv(os.path.sep.join([args.out_folder, '{}_dH_curves.csv'.format(g)]), index=False)
def create_mmaster_stack(filelist, extent=None, res=None, epsg=None, outfile='mmaster_stack.nc', clobber=False, uncert=False, coreg=False, ref_tiles=None, exc_mask=None, inc_mask=None, outdir='tmp', filt_dem=None, add_ref=False, add_corr=False, latlontile_nodata=None, filt_mm_corr=False, l1a_zipped=False, y0=1900, tmptag=None): """ Given a list of DEM files, create a stacked NetCDF file. :param filelist: List of DEM filenames to stack. :param extent: Spatial extent of DEMs to limit stack to [xmin, xmax, ymin, ymax]. :param res: Output spatial resolution of DEMs. :param epsg: EPSG code of output CRS. :param outfile: Filename for output NetCDF file. :param clobber: clobber existing dataset when creating NetCDF file. :param uncert: Include uncertainty variable in the output NetCDF. :param coreg: Co-register DEMs to an input DEM (given by a shapefile of tiles). :param ref_tiles: Filename of input reference DEM tiles. :param exc_mask: Filename of exclusion mask (i.e., glaciers) to use in co-registration :param inc_mask: Filename of inclusion mask (i.e., land) to use in co-registration. :param outdir: Output directory for temporary files. :param filt_dem: Filename of DEM to filter elevation differences to. :param add_ref: Add reference DEM as a stack variable :param add_corr: Add correlation masks as a stack variable :param latlontile_nodata: Apply nodata for a lat/lon tile footprint to avoid overlapping and simplify xarray merging :param filt_mm_corr: Filter MMASTER DEM with correlation mask out of mmaster_tools when stacking (disk space), :param l1a_zipped: Use if files have been zipped to save on space. :param y0: Year 0 to reference NetCDF time variable to. :param tmptag: string to append to temporary files. :type filelist: array-like :type extent: array-like :type res: float :type epsg: int :type outfile: str :type clobber: bool :type uncert: bool :type coreg: bool :type ref_tiles: str :type exc_mask: str :type inc_mask: str :type outdir: str :type filt_dem: str :type add_ref: bool :type add_corr: bool :type latlontile_nodata: str :type filt_mm_corr: bool :type l1a_zipped: bool :type y0: float :type tmptag: str :returns nco: NetCDF Dataset of stacked DEMs. """ if extent is not None: if type(extent) in [list, tuple]: xmin, xmax, ymin, ymax = extent elif type(extent) is Polygon: x, y = extent.boundary.coords.xy xmin, xmax = min(x), max(x) ymin, ymax = min(y), max(y) else: raise ValueError( 'extent should be a list, tuple, or shapely.Polygon') else: xmin, xmax, ymin, ymax = get_common_bbox(filelist, epsg) print('Searching for intersecting DEMs among the list of ' + str(len(filelist)) + '...') # check if each footprint falls within our given extent, and if not - remove from the list. if l1a_zipped: # if l1a are zipped, too long to extract archives and read extent from rasters ; so read metadata instead l1a_filelist = [ fn for fn in filelist if os.path.basename(fn)[0:3] == 'AST' ] rest_filelist = [fn for fn in filelist if fn not in l1a_filelist] l1a_inters = get_footprints_inters_ext(l1a_filelist, [xmin, ymin, xmax, ymax], epsg, use_l1a_met=True) rest_inters = get_footprints_inters_ext(rest_filelist, [xmin, ymin, xmax, ymax], epsg) filelist = l1a_inters + rest_inters else: filelist = get_footprints_inters_ext(filelist, [xmin, ymin, xmax, ymax], epsg) print('Found ' + str(len(filelist)) + '.') if len(filelist) == 0: print('Found no DEMs intersecting extent to stack. Skipping...') sys.exit() datelist = np.array([parse_date(f) for f in filelist]) sorted_inds = np.argsort(datelist) print(filelist[sorted_inds[0]]) if l1a_zipped and os.path.basename(filelist[sorted_inds[0]])[0:3] == 'AST': tmp_zip = filelist[sorted_inds[0]] z_name = '_'.join( os.path.basename(tmp_zip).split('_')[0:3]) + '_Z_adj_XAJ_final.tif' if tmptag is None: fn_tmp = os.path.join(os.path.dirname(tmp_zip), 'tmp_out.tif') else: fn_tmp = os.path.join(os.path.dirname(tmp_zip), 'tmp_out_' + tmptag + '.tif') mt.extract_file_from_zip(tmp_zip, z_name, fn_tmp) tmp_img = GeoImg(fn_tmp) else: tmp_img = GeoImg(filelist[sorted_inds[0]]) if res is None: res = np.round( tmp_img.dx) # make sure that we have a nice resolution for gdal if epsg is None: epsg = tmp_img.epsg # now, reproject the first image to the extent, resolution, and coordinate system needed. dest = gdal.Warp('', tmp_img.gd, format='MEM', dstSRS='EPSG:{}'.format(epsg), xRes=res, yRes=res, outputBounds=(xmin, ymin, xmax, ymax), resampleAlg=gdal.GRA_Bilinear) if l1a_zipped and os.path.basename(filelist[sorted_inds[0]])[0:3] == 'AST': os.remove(fn_tmp) first_img = GeoImg(dest) first_img.filename = filelist[sorted_inds[0]] # NetCDF assumes that coordinates are the cell center if first_img.is_area(): first_img.to_point() # first_img.info() nco, to, xo, yo = create_nc(first_img.img, outfile=outfile, clobber=clobber, t0=np.datetime64('{}-01-01'.format(y0))) create_crs_variable(first_img.epsg, nco) # crso.GeoTransform = ' '.join([str(i) for i in first_img.gd.GetGeoTransform()]) # maxchar = max([len(f.rsplit('.tif', 1)[0]) for f in args.filelist]) go = nco.createVariable('dem_names', str, ('time', )) go.long_name = 'Source DEM Filename' zo = nco.createVariable('z', 'f4', ('time', 'y', 'x'), fill_value=-9999, zlib=True, chunksizes=[ 500, min(150, first_img.npix_y), min(150, first_img.npix_x) ]) zo.units = 'meters' zo.long_name = 'Height above WGS84 ellipsoid' zo.grid_mapping = 'crs' zo.coordinates = 'x y' zo.set_auto_mask(True) if ref_tiles is not None: if ref_tiles.endswith('.shp'): master_tiles = gpd.read_file(ref_tiles) s = STRtree([f for f in master_tiles['geometry'].values]) bounds = Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]) ref_vrt = get_tiles(bounds, master_tiles, s, outdir) elif ref_tiles.endswith('.vrt') or ref_tiles.endswith('.tif'): ref_vrt = ref_tiles ref = GeoImg(ref_vrt) if filt_dem is not None: filt_dem_img = GeoImg(filt_dem) filt_dem = filt_dem_img.reproject(first_img) # 3 overlapping pixels on each side of the tile in case reprojection is necessary; will be removed when merging if latlontile_nodata is not None and epsg is not None: mask = binary_dilation(vt.latlontile_nodatamask( first_img, latlontile_nodata), iterations=3) if uncert: uo = nco.createVariable('uncert', 'f4', ('time', )) uo.long_name = 'RMSE of stable terrain differences.' uo.units = 'meters' if add_ref and ref_tiles is not None: ro = nco.createVariable('ref_z', 'f4', ('y', 'x'), fill_value=-9999, chunksizes=[ min(150, first_img.npix_y), min(150, first_img.npix_x) ]) ro.units = 'meters' ro.long_name = 'Height above WGS84 ellipsoid' ro.grid_mapping = 'crs' ro.coordinates = 'x y' ro.set_auto_mask(True) ref_img = ref.reproject(first_img).img if latlontile_nodata is not None and epsg is not None: ref_img[~mask] = np.nan ro[:, :] = ref_img if add_corr: co = nco.createVariable('corr', 'i1', ('time', 'y', 'x'), fill_value=-1, zlib=True, chunksizes=[ 500, min(150, first_img.npix_y), min(150, first_img.npix_x) ]) co.units = 'percent' co.long_name = 'MMASTER correlation' co.grid_mapping = 'crs' co.coordinates = 'x y' co.set_auto_mask(True) x, y = first_img.xy(grid=False) xo[:] = x yo[:] = y # trying something else to speed up writting in compressed chunks list_img, list_corr, list_uncert, list_dt, list_name = ([] for i in range(5)) outind = 0 for ind in sorted_inds[0:]: print(filelist[ind]) # get instrument bname = os.path.splitext(os.path.basename(filelist[ind]))[0] splitname = bname.split('_') instru = splitname[0] # special case for MMASTER outputs (for disk usage) if instru == 'AST': fn_z = '_'.join(splitname[0:3]) + '_Z_adj_XAJ_final.tif' fn_corr = '_'.join(splitname[0:3]) + '_CORR_adj_final.tif' # to avoid running into issues in parallel if tmptag is None: fn_z_tmp = os.path.join(os.path.dirname(filelist[ind]), fn_z) fn_corr_tmp = os.path.join(os.path.dirname(filelist[ind]), fn_corr) else: fn_z_tmp = os.path.join( os.path.dirname(filelist[ind]), os.path.splitext(fn_z)[0] + '_' + tmptag + '.tif') fn_corr_tmp = os.path.join( os.path.dirname(filelist[ind]), os.path.splitext(fn_corr)[0] + '_' + tmptag + '.tif') list_fn_rm = [fn_z_tmp, fn_corr_tmp] # unzip if needed if l1a_zipped: mt.extract_file_from_zip(filelist[ind], fn_z, fn_z_tmp) if filt_mm_corr or add_corr: mt.extract_file_from_zip(filelist[ind], fn_corr, fn_corr_tmp) # open dem, filter with correlation mask if it comes out of MMASTER if filt_mm_corr: img = corr_filter_aster(fn_z_tmp, fn_corr_tmp, 70) else: img = GeoImg(fn_z_tmp) else: img = GeoImg(filelist[ind]) if img.is_area(): # netCDF assumes coordinates are the cell center img.to_point() if add_corr: if instru == 'AST': corr = GeoImg(fn_corr_tmp) if corr.is_area(): corr.to_point() if coreg: try: NDV = img.NDV coreg_outdir = os.path.join( outdir, os.path.basename(filelist[ind]).rsplit('.tif', 1)[0]) _, img, _, stats_final = dem_coregistration( ref, img, glaciermask=exc_mask, landmask=inc_mask, outdir=coreg_outdir, inmem=True) dest = gdal.Warp('', img.gd, format='MEM', dstSRS='EPSG:{}'.format(epsg), xRes=res, yRes=res, outputBounds=(xmin, ymin, xmax, ymax), resampleAlg=gdal.GRA_Bilinear, srcNodata=NDV, dstNodata=-9999) img = GeoImg(dest) if add_corr: if instru == 'AST': corr = corr.reproject(img) else: corr = img.copy() corr.img[:] = 100 co[outind, :, :] = corr.img.astype(np.int8) if filt_dem is not None: valid = np.logical_and(img.img - filt_dem.img > -400, img.img - filt_dem.img < 1000) img.img[~valid] = np.nan if latlontile_nodata is not None and epsg is not None: img.img[~mask] = np.nan if add_corr: corr.img[~mask] = -1 nvalid = np.count_nonzero(~np.isnan(img.img)) if nvalid == 0: print('No valid pixel in the stack extent: skipping...') if l1a_zipped and (instru == 'AST'): for fn_rm in list_fn_rm: if os.path.exists(fn_rm): os.remove(fn_rm) continue zo[outind, :, :] = img.img if uncert: uo[outind] = stats_final[3] print('Adding DEM that has ' + str(nvalid) + ' valid pixels in this extent, with a global RMSE of ' + str(stats_final[3])) except: print('Coregistration failed: skipping...') if l1a_zipped and (instru == 'AST'): for fn_rm in list_fn_rm: if os.path.exists(fn_rm): os.remove(fn_rm) continue else: img = img.reproject(first_img) if add_corr: if instru == 'AST': corr = corr.reproject(first_img) else: corr = img.copy() corr.img[:] = 100 # co[outind, :, :] = corr.img.astype(np.int8) if filt_dem is not None: valid = np.logical_and(img.img - filt_dem.img > -400, img.img - filt_dem.img < 1000) img.img[~valid] = np.nan if latlontile_nodata is not None and epsg is not None: img.img[~mask] = np.nan if add_corr: corr.img[~mask] = -1 nvalid = np.count_nonzero(~np.isnan(img.img)) if nvalid == 0: print('No valid pixel in the stack extent: skipping...') if l1a_zipped and (instru == 'AST'): for fn_rm in list_fn_rm: if os.path.exists(fn_rm): os.remove(fn_rm) continue # zo[outind, :, :] = img.img if uncert: try: stats = read_stats(os.path.dirname(filelist[ind])) except: stats = None # uo[outind] = stats['RMSE'] # to[outind] = datelist[ind].toordinal() - dt.date(y0, 1, 1).toordinal() # go[outind] = os.path.basename(filelist[ind]).rsplit('.tif', 1)[0] if stats is None: list_uncert.append(5.) else: try: list_uncert.append(stats['RMSE']) except KeyError: print('KeyError for RMSE here:' + filelist[ind]) continue list_img.append(img.img) list_corr.append(corr.img.astype(np.int8)) list_dt.append(datelist[ind].toordinal() - dt.date(y0, 1, 1).toordinal()) list_name.append(os.path.basename(filelist[ind]).rsplit('.tif', 1)[0]) outind += 1 if l1a_zipped and (instru == 'AST'): for fn_rm in list_fn_rm: if os.path.exists(fn_rm): os.remove(fn_rm) # then write all at once zo[0:outind, :, :] = np.stack(list_img, axis=0) co[0:outind, :, :] = np.stack(list_corr, axis=0) uo[0:outind] = np.array(list_uncert) to[0:outind] = np.array(list_dt) go[0:outind] = np.array(list_name) return nco
odir = '/Volumes/arc_03/vargola/eoss_images/BREWSTER/' bedrx_poly_mask = odir+'bedrx_poly.shp' # read in DEMs and project to same extents dem1 = GeoImg(args.dem_1) dem2 = GeoImg(args.dem_2) x_res = dem1.dx # had to add this for calculating dv at the end, mask seems to distort y_res = dem1.dy #coregister if coregFlag: coreg.dem_coregistration(dem1,dem2,glaciermask=None,landmask=bedrx_poly_mask,outdir=odir,pts=False) dem2 = GeoImg(args.dem_2[0:-4] + '_adj.tif') # dh_dem dem2_reproj = dem2.reproject(dem1) # reproject master DEM to slave DEM extent, cell size dh_dem = dem2.copy(new_raster=dem2_reproj.img - dem1.img) dh_bedrx = dem2.copy(new_raster=dem2_reproj.img - dem1.img) # remove outliers dem_rem = np.absolute(np.nanmean(dh_dem.img))+(np.nanstd(dh_dem.img)*5) np.seterr(all='ignore') dh_dem.img[dh_dem.img > dem_rem] = np.nan dh_dem.img[dh_dem.img < (dem_rem*-1)] = np.nan dh_bedrx.img[dh_bedrx.img > dem_rem] = np.nan dh_bedrx.img[dh_bedrx.img < (dem_rem*-1)] = np.nan # test coregistered bedrx error bedrx_ma = imtool.create_mask_from_shapefile(dem1,bedrx_poly_mask,buffer=None) bedrx_mask = np.invert(bedrx_ma) # inverting for mask dh_bedrx.img = np.ma.masked_where(bedrx_mask, dh_bedrx.img) # masking