def transform_aspect(aspect_raster, nodata, transform='both', out_dir=None): nesw = False nwse = False ds = gdal.Open(aspect_raster) ar = ds.ReadAsArray() tx = ds.GetGeoTransform() prj = ds.GetProjection() ds = None driver = gdal.GetDriverByName('envi') if not out_dir: out_dir = os.path.dirname(aspect_raster) if transform == 'both': nesw = True nwse = True elif transform == 'nesw': nesw = True else: nwse = True mask = ar != nodata if nesw: ar_nesw = np.full(ar.shape, 255, dtype=np.uint8) ar_nesw[mask] = 100 * (np.cos(np.radians(ar - 225)) + 1)[mask] out_path = aspect_raster.replace('.bsq', '_nesw.bsq') array_to_raster(ar_nesw, tx, prj, driver, out_path, dtype=gdal.GDT_Byte, silent=True) desc = ('Cosine transformed aspect representing northeast/southwest-ness.\n' +\ '\tCreated with the expression: 100 * (np.cos(np.radians(ar - 225)) + 1)\n' + \ '\tInput aspect raster: {0}').format(aspect_raster) createMetadata(sys.argv, out_path, description=desc) if nwse: ar_nwse = np.full(ar.shape, 255, dtype=np.uint8) ar_nwse[mask] = 100 * (np.cos(np.radians(ar - 135)) + 1)[mask] out_path = aspect_raster.replace('.bsq', '_nwse.bsq') array_to_raster(ar_nwse, tx, prj, driver, out_path, dtype=gdal.GDT_Byte, silent=True) desc = ('Cosine transformed aspect representing northwest/southeast-ness.\n' +\ '\tCreated with the expression: 100 * (np.cos(np.radians(ar - 135)) + 1)\n' + \ '\tInput aspect raster: {0}').format(aspect_raster) createMetadata(sys.argv, out_path, description=desc)
def scale_raster(in_raster, nodata, scale): ds = gdal.Open(in_raster) ar = ds.ReadAsArray() tx = ds.GetGeoTransform() prj = ds.GetProjection() driver = ds.GetDriver() ds = None #driver = gdal.GetDriverByName('envi') mask = ar != nodata ar[mask] = ar[mask] * scale out_path = in_raster #.replace('.bsq', '_scaled%s.bsq' % scale) array_to_raster(ar, tx, prj, driver, out_path, dtype=gdal.GDT_Int16, silent=True) desc = '%s scaled by %s.\n' % (in_raster, scale) createMetadata(sys.argv, out_path, description=desc)
def main(in_raster, snap_raster, in_nodata, out_nodata, out_path=None, mask_val=None, overwrite=False): t0 = time.time() in_nodata = int(in_nodata) out_nodata = int(out_nodata) print '\nOpening datasets... ' t1 = time.time() ds_in = gdal.Open(in_raster) ar_in = ds_in.ReadAsArray() tx_in = ds_in.GetGeoTransform() #driver = ds_in.GetDriver() ds_in = None ds_snap = gdal.Open(snap_raster) ar_snap = ds_snap.ReadAsArray() tx_snap = ds_snap.GetGeoTransform() prj = ds_snap.GetProjection() ds_snap = None print '%.1f seconds\n' % (time.time() - t1) print 'Snapping input raster...' t1 = time.time() offset = calc_offset((tx_snap[0], tx_snap[3]), tx_in) snap_inds, in_inds = get_offset_array_indices(ar_snap.shape, ar_in.shape, offset) np_dtype = ar_in.dtype ar = np.full(ar_snap.shape, out_nodata, dtype=np_dtype) ar_in[ar_in == in_nodata] = out_nodata ar[snap_inds[0]:snap_inds[1], snap_inds[2]:snap_inds[3]] = ar_in[in_inds[0]:in_inds[1], in_inds[2]:in_inds[3]] if mask_val: mask_val = int(mask_val) ar[ar_snap == mask_val] = out_nodata print '%.1f seconds\n' % (time.time() - t1) if out_path: if ar.max() <= 255 and ar.min() >= 0: gdal_dtype = gdal.GDT_Byte else: gdal_dtype = gdal.GDT_Int16 if os.path.exists(out_path) and not overwrite: sys.exit('out_path already exists') driver = get_gdal_driver(out_path) array_to_raster(ar, tx_snap, prj, driver, out_path, gdal_dtype, out_nodata) # Write metadata desc = ('Input raster %s snapped to the extent of %s.') % (in_raster, snap_raster) if mask_val: desc += ' Data were masked from snap raster with value %s.' % mask_val createMetadata(sys.argv, out_path, description=desc) else: return ar print '\nTotal time to snap raster: %.1f seconds\n' % (time.time() - t0)
def main(raster, nodata, psu_shp, out_dir): nodata = int(nodata) psus = attributes_to_df(psu_shp) ds = gdal.Open(raster) ar = ds.GetVirtualMemArray() #ReadAsArray() tx = ds.GetGeoTransform() prj = ds.GetProjection() driver = gdal.GetDriverByName('gtiff') # Just extract the test sample first test_sample_dfs = [] print '\nGetting test samples for PSUs...' for i, psu in psus.iterrows(): # Calc offsets row_off, col_off = calc_offset((tx[0], tx[3]), psu[['ul_x', 'ul_y']], tx) n_rows = abs(int((psu.ymax - psu.ymin) / tx[5])) n_cols = abs(int((psu.xmax - psu.xmin) / tx[1])) # Get values test_data = ar[row_off:row_off + n_rows, col_off:col_off + n_cols].ravel() mask = test_data != nodata # Get row/col and x/y vals test_data = test_data[mask] row_inds, col_inds = np.indices((n_rows, n_cols), dtype=np.uint32) row_inds = row_inds.ravel()[mask] col_inds = col_inds.ravel()[mask] these_row_inds = row_inds + row_off these_col_inds = col_inds + col_off y_coords = row_inds * tx[5] + psu.ul_y x_coords = col_inds * tx[1] + psu.ul_x df = pd.DataFrame({ 'row': these_row_inds, 'col': these_col_inds, 'y': y_coords, 'x': x_coords, 'value': test_data, 'tile_id': psu['name'] }) #import pdb; pdb.set_trace() test_sample_dfs.append(df) test_sample = pd.concat(test_sample_dfs, ignore_index=True) basename = os.path.basename(raster) out_txt = os.path.join(out_dir, basename.replace(basename[-4:], '_test.txt')) test_sample.to_csv(out_txt, sep='\t', index=False) # Read the raster as a write-able array and set all test samples to nodata print '\nAssigning nodata val to PSUs in training raster...\n' ar = ds.ReadAsArray() ar[test_sample.row, test_sample.col] = nodata out_raster = out_txt.replace('_test.txt', '_train.tif') array_to_raster(ar, tx, prj, driver, out_raster, nodata=nodata) desc = 'Training raster and test sample (text file with the same name but "_test" at the end) for making and evaluating STEM CONUS maps. Primary sampling units (PSUs) reserved for testing are assigned nodata.' desc += '\n\tInput raster: %s' % os.path.abspath(raster) desc += '\n\tNodata value: %s' % nodata desc += '\n\tPSU shapefile: %s' % os.path.abspath(psu_shp) desc += '\n\tOutput directory: %s\n' % os.path.abspath(out_dir) createMetadata(sys.argv, out_raster, description=desc) #''' ds = None
def main(in_raster, snap_raster, in_nodata, out_nodata, out_path=None, mask_val=None, overwrite=False, n_tiles=None): t0 = time.time() in_nodata = int(in_nodata) out_nodata = int(out_nodata) print '\nOpening datasets... ' t1 = time.time() ds_in = gdal.Open(in_raster) tx_in = ds_in.GetGeoTransform() ds_snap = gdal.Open(snap_raster) snap_size = ds_snap.RasterYSize, ds_snap.RasterXSize tx_snap = ds_snap.GetGeoTransform() prj = ds_snap.GetProjection() print '%.1f seconds\n' % (time.time() - t1) if n_tiles: if not out_path: raise IOError('n_tiles was given, but no out_path specified') tiles = make_tiles(n_tiles, ds_snap) temp_dir = os.path.join(os.path.dirname(out_path), 'temp_tiles') if not os.path.exists(temp_dir): os.mkdir(temp_dir) np_dtype = snap_by_tile(ds_in, ds_snap, tiles, tx_snap, tx_in, in_nodata, out_nodata, temp_dir, mask_val) ar = np.full(snap_size, out_nodata, dtype=np_dtype) for tile_id, coords in tiles.dropna(subset=['file']).iterrows(): with open(coords.file, 'rb') as f: ar_tile = pickle.load(f) t_ysize, t_xsize = ar_tile.shape ul_r = coords.ul_r ul_c = coords.ul_c lr_r = ul_r + t_ysize lr_c = ul_c + t_xsize ar[ul_r:lr_r, ul_c:lr_c] = ar_tile shutil.rmtree(temp_dir) else: ar_in = ds_in.ReadAsArray() ar_snap = ds_snap.ReasAsArray() ar = snap_array(ar_in, ar_snap, tx_in, tx_snap, in_nodata, out_nodata, mask_val) print '%.1f minutes\n' % ((time.time() - t1) / 60) if out_path: if ar.max() <= 255 and ar.min() >= 0: gdal_dtype = gdal.GDT_Byte else: gdal_dtype = gdal.GDT_Int16 if os.path.exists(out_path) and not overwrite: sys.exit('out_path already exists') driver = get_gdal_driver(out_path) array_to_raster(ar, tx_snap, prj, driver, out_path, gdal_dtype, out_nodata) # Write metadata desc = ('Input raster %s snapped to the extent of %s.') % (in_raster, snap_raster) if mask_val: desc += ' Data were masked from snap raster with value %s.' % mask_val createMetadata(sys.argv, out_path, description=desc) else: return ar print '\nTotal time to snap raster: %.1f seconds\n' % (time.time() - t0)
def main(model_dir, n_tiles, **kwargs): t0 = time.time() n_tiles = [int(n) for n in n_tiles.split(',')] if not os.path.isdir(model_dir): message = 'model directory given does not exist or is not a directory: ', model_dir raise IOError(message) model = os.path.basename(model_dir) dt_dir = os.path.join(model_dir, 'decisiontree_models') set_txt = os.path.join(dt_dir, '%s_support_sets.txt' % model) df_sets = pd.read_csv(set_txt, sep='\t', index_col='set_id') pred_param_path = glob(os.path.join(model_dir, 'predict_stem_*params.txt'))[0] predict_params, df_var = stem.read_params(pred_param_path) train_param_path = glob(os.path.join(model_dir, 'train_stem_*params.txt'))[0] train_params, _ = stem.read_params(train_param_path) df_var.sort_index(inplace=True) nodata = int(predict_params['nodata'].replace('"', '')) if len(kwargs) == 0: var_ids = df_sets.max_importance.unique() var_names = df_var.ix[var_ids].index variables = zip(var_ids, var_names) else: variables = [(variable_id, variable_name) for variable_name, variable_id in kwargs] mask_path = os.path.join(model_dir, '%s_vote.bsq' % model) if not os.path.exists(mask_path): mask_path = mask_path.replace('.bsq', '.tif') mask_ds = gdal.Open(mask_path) mask_tx = mask_ds.GetGeoTransform() xsize = mask_ds.RasterXSize ysize = mask_ds.RasterYSize prj = mask_ds.GetProjection() df_tiles, df_tiles_rc, tile_size = stem.get_tiles(n_tiles, xsize, ysize, mask_tx) total_tiles = len(df_tiles) df_tiles['tile'] = df_tiles.index # Find the tiles that have only nodata values t1 = time.time() print '\nFinding empty tiles...' mask = mask_ds.ReadAsArray() == nodata empty_tiles = stem.find_empty_tiles(df_tiles, ~mask, mask_tx) mask_ds = None print '%s empty tiles found of %s total tiles\n%.1f minutes\n' %\ (len(empty_tiles), total_tiles, (time.time() - t1)/60) # Select only tiles that are not empty df_tiles = df_tiles.select(lambda x: x not in empty_tiles) total_tiles = len(df_tiles) #some_set = df_sets.iloc[0] support_size = [ int(s) for s in train_params['support_size'].replace('"', '').split(',') ] set_size = [int(abs(s / mask_tx[1])) for s in support_size] out_dir = os.path.join(model_dir, 'importance_maps') if not os.path.exists(out_dir): os.mkdir(out_dir) print variables for vi, (v_id, v_name) in enumerate(variables): t1 = time.time() print 'Making map for %s: %s of %s variables\n' % (v_name, vi + 1, len(variables)) ar = np.full((ysize, xsize), nodata, dtype=np.uint8) for i, (t_ind, t_row) in enumerate(df_tiles.iterrows()): t2 = time.time() print 'Aggregating for %s of %s tiles' % (i + 1, total_tiles) # Calculate the size of this tile in case it's at the edge where the # tile size will be slightly different this_size = abs(t_row.lr_y - t_row.ul_y), abs(t_row.lr_x - t_row.ul_x) df_these_sets = stem.get_overlapping_sets(df_sets, t_row, this_size, support_size) rc = df_tiles_rc.ix[t_ind] this_size = rc.lr_r - rc.ul_r, rc.lr_c - rc.ul_c n_sets = len(df_these_sets) # Load overlapping predictions from disk and read them as arrays tile_ul = t_row[['ul_x', 'ul_y']] print n_sets, ' Overlapping sets' importance_bands = [] importance_values = [] for s_ind, s_row in df_these_sets.iterrows(): # Calculate offset and array/tile indices offset = stem.calc_offset(tile_ul, (s_row.ul_x, s_row.ul_y), mask_tx) #if abs(offset[0]) > this_size[0] or abs(offset[1] > this_size[1]): tile_inds, a_inds = mosaic.get_offset_array_indices( tile_size, set_size, offset) # Get feature with maximum importance and fill tile with that val try: with open(s_row.dt_file, 'rb') as f: dt_model = pickle.load(f) importance_value = int( dt_model.feature_importances_[v_id] * 100) importance_values.append(importance_value) #filled = np.full((nrows, ncols), importance_value, dtype=np.uint8) #import_band = stem.fill_tile_band(this_size, filled, tile_inds, nodata) import_band = np.full(this_size, np.nan, dtype=np.float16) import_band[tile_inds[0]:tile_inds[1], tile_inds[2]:tile_inds[3]] = importance_value importance_bands.append(import_band) except Exception as e: print e continue #''' print 'Average importance for this tile: %.1f' % np.mean( importance_values) #Aggregate importance_stack = np.dstack(importance_bands) importance_tile = np.nanmean(importance_stack, axis=2) tile_mask = mask[rc.ul_r:rc.lr_r, rc.ul_c:rc.lr_c] | np.isnan(importance_tile) importance_tile[tile_mask] = nodata ar[rc.ul_r:rc.lr_r, rc.ul_c:rc.lr_c] = np.round(importance_tile).astype(np.uint8) print 'Aggregation time for this tile: %.1f minutes\n' % ( (time.time() - t2) / 60) '''temp_dir = os.path.join(out_dir, 'delete') if not os.path.isdir(temp_dir): os.mkdir(temp_dir) t_tx = tile_ul[0], 30, 0, tile_ul[1], 0, -30 array_to_raster(np.round(importance_tile).astype(np.uint8), t_tx, prj, gdal.GetDriverByName('gtiff'), os.path.join(temp_dir, 'delete_%s.tif' % t_ind), gdal.GDT_Byte, 255, True)''' out_path = os.path.join(out_dir, '%s_importance_%s.tif' % (model, v_name)) try: array_to_raster(ar, mask_tx, prj, gdal.GetDriverByName('gtiff'), out_path, gdal.GDT_Byte, nodata) except Exception as e: print e import pdb pdb.set_trace() print 'Time for this variable: %.1f minutes\n' % ( (time.time() - t1) / 60) print '\nTotal time for %s variables: %.1f hours\n' % (len(variables), ( (time.time() - t0) / 3600))
def main(region_path, tile_path, reference_path, out_dir, id_field='region_id', ref_basename='nlcd'): df = attributes_to_df(region_path) tile_info = attributes_to_df(tile_path) tile_info['ul_x'] = tile_info.xmin tile_info['lr_x'] = tile_info.xmax tile_info['ul_y'] = tile_info.ymax tile_info['lr_y'] = tile_info.ymin _, vector_ext = os.path.splitext(region_path) region_ids = df[id_field].unique() n_regions = len(region_ids) region_ds = ogr.Open(region_path) region_lyr = region_ds.GetLayer() for i, r_id in enumerate(region_ids): print 'Making region dir for %s (%s of %s)' % (r_id, i, n_regions) df_r = df[df.region_id == r_id] id_str = ('0' + str(r_id))[-2:] fid = df_r.index[0] region_feature = region_lyr.GetFeature(fid) xmin, xmax, ymin, ymax = region_feature.GetGeometryRef().GetEnvelope() region_feature.Destroy() df_r['ul_x'] = xmin df_r['lr_x'] = xmax df_r['ul_y'] = ymax df_r['lr_y'] = ymin clip_coords = df_r.loc[fid, ['ul_x', 'lr_x', 'ul_y', 'lr_y']] region_dir = os.path.join(out_dir, 'region_%s' % id_str) if not os.path.exists(region_dir): os.mkdir(region_dir) # Make a shapefile of the tiles out_vector = os.path.join(region_dir, 'tile_{0}{1}'.format(id_str, vector_ext)) if not os.path.exists(out_vector): ''' switch to selection by min/max of coords ''' region_tiles = tile_info[tile_info[id_field] == r_id] coords_to_shp(region_tiles, region_path, out_vector) # Make a map of reference NLCD ds = gdal.Open(out_vector.replace(vector_ext, '.tif')) mask = ds.ReadAsArray() == 255 ds = None nlcd_year = re.search( '\d\d\d\d', reference_path).group() # finds the first one (potentially buggy) out_ref_map = os.path.join( region_dir, '%s_%s_%s.tif' % (ref_basename, nlcd_year, id_str)) if not False: #os.path.exists(out_ref_map): ref_ds = gdal.Open(reference_path) ref_tx = ref_ds.GetGeoTransform() ref_shape = ref_ds.RasterYSize, ref_ds.RasterXSize col_off = (ref_tx[0] - clip_coords.ul_x) / ref_tx[1] row_off = (ref_tx[3] - clip_coords.ul_y) / ref_tx[5] n_cols = abs((clip_coords.ul_x - clip_coords.lr_x) / ref_tx[1]) n_rows = abs((clip_coords.ul_y - clip_coords.lr_y) / ref_tx[1]) ar_inds, ref_inds = get_offset_array_indices( (n_rows, n_cols), ref_shape, (row_off, col_off)) ref_n_cols = ref_inds[1] - ref_inds[0] ref_n_rows = ref_inds[3] - ref_inds[2] ar_ref = ref_ds.ReadAsArray(ref_inds[2], ref_inds[0], ref_n_cols, ref_n_rows) ar = np.full((n_rows, n_cols), 255) ar[ar_inds[0]:ar_inds[1], ar_inds[2]:ar_inds[3]] = ar_ref ar[mask] = 255 tx = clip_coords.ul_x, 30, 0, clip_coords.ul_y, 0, -30 prj = ref_ds.GetProjection() driver = gdal.GetDriverByName('gtiff') array_to_raster(ar, tx, prj, driver, out_ref_map, nodata=255) # Make a clipped raster of the tiles out_raster = out_vector.replace(vector_ext, '.tif') if not os.path.exists(out_raster): tiles = ogr.Open(tile_path) tile_lyr = tiles.GetLayer() tx = clip_coords.ul_x, 30, 0, clip_coords.ul_y, 0, -30 tile_array, _ = kernel_from_shp(tile_lyr, clip_coords, tx, 255, val_field='name') tile_array[ar == 255] = 255 driver = gdal.GetDriverByName('gtiff') prj = tile_lyr.GetSpatialRef().ExportToWkt() array_to_raster(tile_array, tx, prj, driver, out_raster, nodata=255) tiles.Destroy()