def interp_na(self): ''' np.float32 method = [str] one of 'cubic', 'near', 'linear' return a list of dicts to pass to the xyz_to_grid in parallel ''' from copy import copy import pandas as pd import numpy as np from pathos.mp_map import mp_map # remove the darn scientific notation np.set_printoptions(suppress=True) output_dtype = np.float32 # if 0-360 leave it alone if (self.ds.lon > 200.0).any() == True: dat, lons = np.array(self.ds.data), np.array(self.ds.lon) self._lonpc = lons else: # greenwich-centered rotate to 0-360 for interpolation across pacific dat, lons = self.utils.rotate(np.array(self.ds.values), np.array(self.ds.lon), to_pacific=True) self._rotated = True # update the rotated attribute self._lonpc = lons # mesh the lons and lats and unravel them to 1-D xi, yi = np.meshgrid(self._lonpc, self.ds.lat.data) lo, la = [i.ravel() for i in (xi, yi)] # setup args for multiprocessing df_list = [ pd.DataFrame({ 'x': lo, 'y': la, 'z': d.ravel() }).dropna(axis=0, how='any') for d in dat ] args = [ {'x':np.array(df['x']), 'y':np.array(df['y']), 'z':np.array(df['z']), \ 'grid':(xi,yi), 'method':self.historical.method, 'output_dtype':output_dtype } for df in df_list ] print( 'processing interpolation to convex hull in parallel using {} cpus.' .format(self.ncpus)) dat_list = mp_map(self.wrap, args, nproc=self.ncpus) dat_list = [np.array(i) for i in dat_list] # drop the output mask dat = np.array(dat_list) lons = self._lonpc if self._rotated == True: # rotate it back dat, lons = self.utils.rotate(dat, lons, to_pacific=False) # place back into a new xarray.Dataset object for further processing # self.ds = self.ds.update( { self.historical.variable:( ['time','lat','lon'], dat ) } ) self.ds.data = dat print('ds interpolated updated into self.ds') return 1
def make_decadal_seasonal( base_path, output_path, variable, model, scenario, decade, ncpus, agg_metric ): ''' function to calculate and output mean seasonal monthly data across decades ARGUMENTS: ---------- base_path = [ ] output_path = [ ] model = [ ] scenario = [ ] variable = [ ] begin = [ ] end = [ ] ncpus = [ ] RETURNS ------- output_directory of newly produced GeoTiffs if successful. else, error. ''' decade_begin, decade_end = decade # modeled data files = glob.glob( os.path.join( base_path, model, scenario, variable, '*' + agg_metric + '*.tif' ) ) files = only_years( files, begin=decade_begin, end=decade_end, split_on='_', elem_year=-1 ) # season_names = [ get_month_seaon( fn ) for fn in files ] years = [ int(get_year( fn )) for fn in files ] # min / max years start_year = str( min(years) ) end_year = str( max(years) ) seasons = [ get_season( fn ) for fn in files ] # drop data for start_year JF and end_year this is useful for annuals, but not really decadals # files = [ fn for fn in files if not '_'.join([ '01',start_year ]) in fn if not '_'.join([ '02',start_year ]) in fn if not '_'.join([ '12',end_year ]) in fn ] files = pd.Series( files ) grouped_seasons = files.groupby( seasons ) args = [ ( season_name, file_group.tolist(), output_path, agg_metric ) for season_name, file_group in grouped_seasons ] _ = mp_map( wrap, args, nproc=ncpus ) return args
MMSI_grouped_keep = MMSI_grouped_keep.groupby( new_groups ).apply( fix_voyage_id ) # MMSI_grouped_keep.loc[ :, 'Voyage' ] = MMSI_grouped_keep.loc[ :, 'clusters' ] # voyage_group_names = grouped.groups.keys() # run the voyage cleaner function on the grouped voyage data frames # gdf_3338 = MMSI_grouped_keep.groupby( 'Voyage' ).apply( clean_grouped_voyages( df ) ) # parallelize it MMSI_grouped_voyages = pd.Series([ j.copy() for i,j in MMSI_grouped_keep.groupby( 'Voyage' ) ]) del MMSI_grouped_keep, df # cleanup print (' running voyage cleaner') if len( MMSI_grouped_voyages ) >= 2000: splitter = np.array_split( range( len( MMSI_grouped_voyages ) ), int( len( MMSI_grouped_voyages ) / 1000 ) ) out = [ mp_map( clean_grouped_voyages, sequence=MMSI_grouped_voyages[ i ], nproc=ncpus ) for i in splitter ] # unlist out = [ j for i in out for j in i ] else: out = mp_map( clean_grouped_voyages, sequence=MMSI_grouped_voyages, nproc=ncpus ) df = pd.concat( ( i for i in out if i.shape[0] > 0 ) ) del MMSI_grouped_voyages # cleanup # run the intersect testing MMSI_grouped_goodbad = pd.Series([ j.copy() for i,j in df.groupby( 'Voyage' ) ]) break_goodbad_partial = partial( break_goodbad, land=land ) # partial function build if len( MMSI_grouped_goodbad ) >= 2000: splitter = np.array_split( range( len( MMSI_grouped_goodbad ) ), int( len( MMSI_grouped_goodbad ) / 1000 ) )
from pathos.mp_map import mp_map # read in the args base_path = '/workspace/Shared/Tech_Projects/ESGF_Data_Access/project_data/tem_data_sep2016' cru_path = '/Data/Base_Data/Climate/World/CRU_grids/CRU_TS323/' args = [] for model in ['CCSM4', 'GFDL-CM3', 'IPSL-CM5A-LR', 'MRI-CGCM3', 'GISS-E2-R']: for scenario in ['historical', 'rcp26','rcp45','rcp60','rcp85']: tas_list = sorted( glob.glob( os.path.join( base_path, 'downscaled', model, scenario, 'tas', '*.tif' ) ) ) hur_list = sorted( glob.glob( os.path.join( base_path, 'downscaled', model, scenario, 'hur', '*.tif' ) ) ) # make args to pass to the run function args = args + zip( tas_list, hur_list ) # run in parallel out = mp_map( run, args, nproc=32 ) # # # CONVERT CL20 2km to vap tas_list = sorted(glob.glob( os.path.join( base_path, 'cru', 'cru_cl20', 'tas', '*.tif' ) )) hur_list = sorted(glob.glob( os.path.join( base_path, 'cru', 'cru_cl20', 'hur', '*.tif' ) )) args = zip( tas_list, hur_list ) out = mp_map( run, args, nproc=12 ) # # # CONVERT CRU TS323 vap/tas to hur --> output to a non CF-compliant NetCDF that will be read back in with xarray tas = xr.open_dataset( '/Data/Base_Data/Climate/World/CRU_grids/CRU_TS323/cru_ts3.23.1901.2014.tmp.dat.nc' ) vap = xr.open_dataset( '/Data/Base_Data/Climate/World/CRU_grids/CRU_TS323/cru_ts3.23.1901.2014.vap.dat.nc' ) hur = convert_to_hur( tas.tmp, vap.vap ) hur_ds = hur.to_dataset( name='hur' ) hur_ds.to_netcdf( '/Data/Base_Data/Climate/World/CRU_grids/CRU_TS323/cru_ts3.23.1901.2014.hur.SNAP_derived.dat.nc' )
if scenario == 'historical': old_dir = '/Data/Base_Data/Climate/AK_CAN_2km/historical/AR5_CMIP5_models' begin = 1950 end = 1965 else: old_dir = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models' begin = 2060 end = 2070 figsize = (16,9) out = {} for v in variables: path = os.path.join( base_dir,'downscaled', m, scenario, v ) files = glob.glob( os.path.join( path, '*.tif' ) ) files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) out[ v ] = mp_map( masked_mean, files, nproc=4 ) if v == 'tas' or v == 'pr': if m == 'CRU_TS323': path = os.path.join( old_dir, v ) else: path = os.path.join( old_dir, scenario, m, v ) files = glob.glob( os.path.join( path, '*.tif' ) ) files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) out[ v+'_old' ] = mp_map( masked_mean, files, nproc=4 ) plot_df = pd.DataFrame( out ) plot_df.index = pd.date_range( start=str(begin), end=str(end+1), freq='M' ) # sort the columns for output plotting cleanliness: if 'tas' in variables: col_list = ['tasmax', 'tas_old', 'tas', 'tasmin']
def downscale_cru_ts( self, *args, **kwargs ): ''' run the CRU downscaling using the monthly climatology files given ''' from pathos.mp_map import mp_map import glob, affine, rasterio nc_varname = self._get_varname_cru( ) # handle cases where the desired varname is not the same as the one parsed from file. if self.variable == None: variable = nc_varname else: variable = self.variable # build output dirs anomalies_path = os.path.join( base_path, variable, 'anom' ) if not os.path.exists( anomalies_path ): os.makedirs( anomalies_path ) downscaled_path = os.path.join( base_path, variable, 'downscaled' ) if not os.path.exists( downscaled_path ): os.makedirs( downscaled_path ) # template setup template_raster = rasterio.open( self.template_raster_fn ) template_meta = template_raster.meta template_meta.update( crs={'init':'epsg:3338'} ) # make a mask with values of 0=nodata and 1=data template_raster_mask = template_raster.read_masks( 1 ) # mask of band 1 is all we need template_raster_mask[ template_raster_mask == 255 ] = 1 anomalies = self.utils.calc_anomalies( self.cru_ts, variable, absolute=self.absolute ) anomalies_pcll, lons_pcll = self.utils.shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds # mesh the lons and lats and unravel them to 1-D lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ] # convert into pandas.DataFrame and drop all the NaNs -- land-only dataset anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in anomalies_pcll ] xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data ) # argument setup -- HARDWIRED src_transform = affine.Affine( 0.5, 0.0, -180.0, 0.0, -0.5, 90.0 ) src_nodata = -9999.0 # output_filenames setup years = np.unique( self._get_years_cru( self.cru_ts ) ) cru_ts_version = self._get_version_cru( self.cru_ts ) # works if naming convention stays same months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ] month_year = [ (month, year) for year in years for month in months ] output_filenames = [ os.path.join( anomalies_path, '_'.join([ variable, self.metric, cru_ts_version, 'anom', month, str(year) ])+'.tif' ) for month, year in month_year ] # make a list of args to pass to the interpolation function args_list = [ {'anom_df':anom_df, 'meshgrid_tuple':(xi, yi), 'template_raster_fn':template_raster_fn, 'lons_pcll':lons_pcll, 'src_transform':src_transform, 'src_crs':self.src_crs, \ 'src_nodata':src_nodata, 'output_filename':fn } \ for anom_df, fn in zip( anom_df_list, output_filenames ) ] anomalies = mp_map( lambda args: self.utils.interpolate_anomalies( **args ), args_list, nproc=self.ncores ) # read in the pre-processed 12-month climatology clim_list = sorted( glob.glob( os.path.join( self.clim_path, '*.tif' ) ) ) # this could catch you. clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, clim_list ) } # group the anomalies output filenames by months out = pd.Series( anomalies ) out_months = out.apply( lambda x: DownscaleCRU._fn_month_grouper( fn=x ) ) months_grouped = out.groupby( out_months ) mg = [ (name, fn, fn.replace( 'anom', 'downscaled')) for name, group in months_grouped for fn in group.tolist() ] # output metadata meta = rasterio.open( mg[0][1] ).meta meta.update( compress='lzw' ) # set downscaling_operation based on self.absolute boolean if self.absolute == True: downscaling_operation = 'add' elif self.absolute == False: downscaling_operation = 'mult' else: AttributeError( 'downscaling operation: self.absolute must be boolean' ) # make an args tuple to pass to the function args_list = [ { 'anom_arr':rasterio.open( fn ).read(1), 'baseline_arr':clim_dict[i], \ 'output_filename':out_fn, 'downscaling_operation':downscaling_operation, \ 'meta':meta, 'post_downscale_function':self.post_downscale_function } \ for i, fn, out_fn in mg ] # downscale / write to disk out = mp_map( lambda args: self.utils.downscale( **args ), args_list, nproc=self.ncores ) return 'downscaling complete. files output at: %s' % base_path
# more prep scenario, model, variable = path_slicer_ar5(input_path) template_raster_mask = rasterio.open(template_raster_mask_fn) output_path = os.path.join(output_base_path, scenario, model, variable) try: if not os.path.exists(output_path): os.makedirs(output_path) except: pass file_list = glob.glob(os.path.join(input_path, "*.tif")) output_filenames = [generate_output_fn(input_fn, output_path, group) for input_fn in file_list] args = zip(file_list, output_filenames, itertools.repeat(template_raster_mask_fn, len(file_list))) _ = mp_map(resample_to_1km, args, nproc=ncores) # # # RUN THE ABOVE # # # # # import os, glob # import numpy as np # # list the data we want # input_path = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models' # out = [ root for root, subs, files in os.walk( input_path ) \ # if len( glob.glob( os.path.join( root, '*.tif' ) ) ) > 0 and not 'derived' in root ] # input_paths = np.unique( out ).tolist() # model = 'CCSM4' # [ 'CCSM4', 'CNRM-CM5', 'GFDL-CM3', 'GISS-E2-R', 'IPSL-CM5A-LR', 'MPI-ESM-LR', 'MRI-CGCM3', '5modelAvg' ] # scenarios = [ 'rcp26', 'rcp45', 'rcp60', 'rcp85' ]
if __name__ == '__main__': import os, glob, subprocess from pathos.mp_map import mp_map base_paths = ['/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models', '/Data/Base_Data/Climate/AK_CAN_2km/historical/AR5_CMIP5_models'] out = [] for base_path in base_paths: for root, subs, files in os.walk( base_path ): out = out + [ os.path.join( root, fn ) for fn in files if 'CCSM4' in fn ] out_files = [ fn.replace( 'CCSM4', 'NCAR-CCSM4' ) for fn in out ] args = zip( out, out_files ) _ = mp_map( wrap, args, nproc=32 ) # * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * # # # # Here is how I renamed the 5modelAvg to 5ModelAvg, which is better # * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * def rename_file( in_fn, out_fn, *args, **kwargs ): import os, shutil dirname = os.path.dirname( out_fn ) try: if not os.path.exists( dirname ): os.makedirs( dirname ) except: pass return shutil.copy( in_fn, out_fn )
out.write_band( 1, arr ) return fn.replace( '.tif', '_fix.tif' ) if __name__ == '__main__': import rasterio, glob, os import numpy as np import pandas as pd from pathos.mp_map import mp_map from functools import partial mask = '/workspace/Shared/Tech_Projects/ALFRESCO_Inputs/project_data/TEM_Data/extents/IEM_Mask_1km.tif' mask = rasterio.open( mask ).read( 1 ) l = glob.glob( '/workspace/Shared/Tech_Projects/ALFRESCO_Inputs/project_data/TEM_Data/girr_radiation_cmip3_process/IEM/*.tif' ) args = [ (i, mask) for i in l ] done = mp_map( lambda x: run_replace( *x ), args, nproc=32) # for fn in l: # rst = rasterio.open( fn ) # meta = rst.meta # meta.update( compress='lzw' ) # arr = rst.read( 1 ) # ind = np.where( (mask == 1) & (arr > -3) ) # ind_zip = zip( *ind ) # # a little neighborhood math for the queens case
# some pathing input_path = '/Data/malindgren/cru_november_final/IEM/clouds/ar5' output_path = '/Data/malindgren/cru_november_final/final/IEM/ar5' # make all combinations of the output variables combinations = itertools.product( models, scenarios, variables ) for model, scenario, variable in combinations: print( ' '.join([ 'runnning: ', model, scenario]) ) l = glob.glob( os.path.join( input_path, model, variable, 'downscaled', scenario, '*.tif' ) ) out = os.path.join( output_path, model, variable, scenario ) # remove them if they exist with the pct in the name ol = glob.glob( os.path.join( out, '*rsds_*_pct_*.tif' ) ) if len( ol ) > 0: _ = mp_map( lambda x: os.unlink( x ), ol, nproc=32 ) f = partial( shutil.move, dst=out ) _ = mp_map( f, l, nproc=32 ) # # # # CHANGE METRIC NAME # # # # # # # # # # # a tool to move the files to the needed locations in the final directory # # # # import os, glob, itertools, shutil from pathos.mp_map import mp_map from functools import partial
climatology = ds_hist.tasmax.sel( time=slice( str(climbegin), str(climend) ) ) climatology = climatology.groupby( 'time.month' ).apply( lambda x: np.mean( x, axis=0 ) ) anomalies = ds.tasmax.groupby( 'time.month' ) - climatology anomalies[ 'mask' ] = rasterize( shapes, anomalies.coords, longitude='lon', latitude='lat', fill=0 ) anom_mean = anomalies.sel( time=slice( str(begin), str(end) ) ).where( anomalies.mask == 1 ).mean( axis=(1,2) ) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # now lets do the same thing with the rasterio-downscaled data # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # path = '/Users/malindgren/Documents/downscale_epscor/august_fix/EPSCOR_SE_DELIVERY_AUG2016/downscaled/5ModelAvg/rcp45/tasmax' # path = '/Users/malindgren/Documents/downscale_epscor/august_fix/EPSCOR_SC_DELIVERY_AUG2016/derived/grids/annual_seasonals/5ModelAvg/rcp45/tasmax' files = glob.glob( os.path.join( path, '*.tif' ) ) files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) # get the means across space for each timestep down_mean = mp_map( masked_mean, files, nproc=4 ) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # NOW LETS SEE THIS WITH THE LINEAR INTERPOLATION USED... as a TEST. # # # # # # # # # path = '/Users/malindgren/Documents/downscale_epscor/august_fix/CCSM4_clip/tasmax' # files = glob.glob( os.path.join( path, '*.tif' ) ) # files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) # # get the means across space for each timestep # down_mean_linear = mp_map( masked_mean, files, nproc=4 ) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # now lets do the same thing with the PRISM data # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # clipped data with the below with the epscor crop/clip code: # base_path = '/Users/malindgren/Documents/downscale_epscor/tasmax'
shp_fn, rst_fn, out_fn ]) return out_fn def wrap( x ): ''' wrapper for clean multiprocessing call to pool.map ''' return crop_clip( *x ) if __name__ == '__main__': import os, glob, itertools, rasterio import xarray as xr import numpy as np import pandas as pd from pathos.mp_map import mp_map # setup args # base_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/downscaled' base_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/derived_grids' # output_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/EPSCOR_SC_DELIVERY_SEP2016/downscaled' output_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/EPSCOR_SC_DELIVERY_SEP2016/derived/grids' ncpus = 32 subdomain_fn = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/SCTC_studyarea/Kenai_StudyArea.shp' # list up all the args we want to run through the multicore clipping args_list = [] for root, subs, files in os.walk( base_path ): tif_files = [ fn for fn in files if fn.endswith( '.tif' ) ] if len( tif_files ) > 0: args_list = args_list + [ ( subdomain_fn, os.path.join( root, fn ), os.path.join( root, fn ).replace( base_path, output_path ) ) for fn in tif_files ] out = mp_map( wrap, args_list, nproc=ncpus )
# print the input to screen x = np.arange(N * nodes, dtype=np.float64) print("Input: %s\n" % x) # run sin2 in series, then print to screen print("Running serial python ...") y = map(sin2, x) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running mpi4py on %d cores..." % nodes) y = mpi_map(sin2, x, nnodes=nodes) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running multiprocesing on %d processors..." % nodes) y = mp_map(sin2, x, nproc=nodes) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running parallelpython on %d cpus..." % nodes) y = pp_map(sin2, x, ncpus=nodes, servers=('mycpu.mydomain.com',)) print("Output: %s\n" % np.asarray(y)) # EOF
# set bounds to interpolate over # xmin, ymin, xmax, ymax = (0,-90, 360, 90) xmin, ymin, xmax, ymax = (160, 0, 300, 90) # multiply arcminutes in degree by 360(180) for 10' resolution rows = 60 * ( ymax - ymin ) cols = 60 * ( xmax - xmin ) # build the output grid x = np.linspace( xmin, xmax, cols ) y = np.linspace( ymin, ymax, rows ) xi, yi = np.meshgrid( x, y ) args_list = [ {'x':np.array(cru_df['lon']),'y':np.array(cru_df['lat']),'z':np.array(cru_df[month]),'xi':xi,'yi':yi} for month in months ] # run interpolation in parallel interped_grids = mp_map( regrid, args_list, nproc=12 ) # stack and give a proper nodata value arr = np.array([ i.data for i in interped_grids ]) arr[ np.isnan(arr) ] = -3.4e+38 pcll_affine = transform_from_latlon( y, x ) meta = {'affine': pcll_affine, 'count': 1, 'crs': {'init':'epsg:4326'}, 'driver': u'GTiff', 'dtype': 'float32', 'height': rows, 'nodata': -3.4e+38, 'width': cols, 'compress':'lzw'}
return out_fn def wrap( x ): ''' wrapper for clean multiprocessing call to pool.map ''' return crop_clip( *x ) if __name__ == '__main__': import os, glob, itertools, rasterio import xarray as xr import numpy as np import pandas as pd from pathos.mp_map import mp_map # setup args base_path = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models' output_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/EPSCOR_SC_DELIVERY_SEP2016/derived/grids/monthly_decadals' ncpus = 32 subdomain_fn = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/SCTC_studyarea/Kenai_StudyArea.shp' models = [ 'IPSL-CM5A-LR', 'MRI-CGCM3', 'GISS-E2-R', 'GFDL-CM3', 'NCAR-CCSM4', '5ModelAvg' ] # list up all the args we want to run through the multicore clipping fn_list = [] for root, subs, files in os.walk( base_path ): if any( [ model in root for model in models ] ) == True: if 'derived' in root: if len( [ fn for fn in files if fn.endswith( '.tif' ) ] ) > 0: fn_list = fn_list + glob.glob( os.path.join( root, '*.tif' ) ) args_list = [ make_args( rst_fn, subdomain_fn, output_path ) for rst_fn in fn_list if 'dof' in rst_fn or 'dot' in rst_fn or 'logs' in rst_fn ] out = mp_map( lambda x: wrap( x ), args_list, nproc=32 )
out_dir_lookup = {'dot':'decadal_dot','dof':'decadal_dof','logs':'decadal_logs' } # get all files file_list = [] for root, subs, files in os.walk( in_path ): if len(files) > 0: file_list = file_list + [ os.path.join( root, fn ) for fn in files ] # make some arguments for passing to shutil.copy args = [] for fn in file_list: folder_name = out_dir_lookup[ os.path.basename( fn ).split( '_' )[0] ] out_path = os.path.join( output_path, folder_name ) new_fn = fn.replace( in_path, out_path ) args = args + [(fn, new_fn)] def copy_it( x ): fn, new_fn = x out_path = os.path.dirname( new_fn ) try: if not os.path.exists( out_path ): os.makedirs( out_path ) except: pass # out_path.replace( '/dof', '' ) return shutil.copy( fn, new_fn ) _ = mp_map( copy_it, args, nproc=16 )
# # # # TESTING STUFF # # # # # # # # forget the above for testing, lets use Stephs radians # latr = rasterio.open('/workspace/Shared/Tech_Projects/ESGF_Data_Access/project_data/tem_data_sep2016/radiance/radians.txt') # latr = latr.read( 1 ) # # # # # # # # # # # # # # # # # # # calc ordinal days to compute ordinal_days = range( 1, 365+1, 1 ) # make a monthly grouper of ordinal days ordinal_to_months = [ str(datetime.date.fromordinal( i ).month) for i in ordinal_days ] # convert those months to strings ordinal_to_months = [ ('0'+month if len( month ) < 2 else month) for month in ordinal_to_months ] # calc girr f = partial( calc_ra, lat=lat_rad ) Ra = mp_map( f, ordinal_days, nproc=32 ) Ra_monthlies = pd.Series( Ra ).groupby( ordinal_to_months ).apply( lambda x: np.array(x.tolist()).mean( axis=0 ) ) # iteratively put them back in the indexed locations we took them from meta = rst.meta meta.pop( 'transform' ) meta.update( compress='lzw', count=1, dtype='float32' ) for month in Ra_monthlies.index: arr = rst.read( 1 ) arr[ data_ind ] = Ra_monthlies.loc[ month ].tolist() output_filename = os.path.join( output_path, 'girr_w-m2_{}_.tif'.format(str( month ) ) ) with rasterio.open( output_filename, 'w', **meta ) as out: out.write( arr.astype( np.float32 ), 1 )
def downscale( self, output_dir, prefix=None ): import affine from affine import Affine import itertools from functools import partial from pathos.mp_map import mp_map operation_switch = { 'add':self.add, 'mult':self.mult } def two_digit_month( x ): ''' make 1 digit month a standard 2-digit for output filenames ''' month = str( x ) if len(month) == 1: month = '0'+month return month time_suffix = [ '_'.join([two_digit_month( t.month ), str(t.year)]) for t in self.anomalies.time.to_pandas() ] # handle missing variable / model names if self.varname != None: variable = self.varname elif self.historical.variable != None: variable = self.historical.variable else: variable = 'variable' if self.modelname != None: model = self.modelname elif self.historical.model != None: model = self.historical.model else: model = 'model' output_filenames = [ os.path.join( output_dir, '_'.join([variable, self.historical.metric, self.historical.units, \ self.historical.project, model, self.historical.scenario, ts]) + '.tif') for ts in time_suffix ] # if there is a specific name prefix, use it if prefix != None: output_filenames = [ os.path.join( output_dir, '_'.join([prefix, ts]) + '.tif' ) for ts in time_suffix ] # rotate to pacific-centered if ( self.anomalies.lon.data > 200.0 ).any() == True: dat, lons = ( self.anomalies, self.anomalies.lon ) self.anomalies_rot = dat src_transform = self.historical.transform_from_latlon( self.historical.ds.lat, lons ) print( 'anomalies NOT rotated!' ) else: dat, lons = utils.shiftgrid( 0., self.anomalies, self.anomalies.lon ) self.anomalies_rot = dat src_transform = self.historical.transform_from_latlon( self.historical.ds.lat, lons ) print( src_transform ) print( 'anomalies rotated!' ) # run and output rstlist = self.baseline.filelist * (self.anomalies_rot.shape[0] / 12) if isinstance( self.anomalies_rot, xr.Dataset ): self.anomalies_rot = self.anomalies_rot[ self.historical.variable ].data elif isinstance( self.anomalies_rot, xr.DataArray ): self.anomalies_rot = self.anomalies_rot.data else: self.anomalies_rot = self.anomalies_rot args = zip( self.anomalies_rot, rstlist, output_filenames ) args = [{'anom':i, 'base':j, 'output_filename':k,\ 'downscaling_operation':self.downscaling_operation, \ 'post_downscale_function':self.post_downscale_function,\ 'mask':self.mask, 'mask_value':self.mask_value } for i,j,k in args ] # partial and wrapper f = partial( self.interp_ds, src_crs=self.src_crs, src_nodata=self.src_nodata, \ dst_nodata=self.dst_nodata, src_transform=src_transform, resample_type=self.resample_type ) run = partial( self._run_ds, f=f, operation_switch=operation_switch, anom=self.anom, mask_value=self.mask_value ) # run it out = mp_map( run, args, nproc=self.ncpus ) return output_dir
def downscale_ar5_ts( self, *args, **kwargs ): from pathos.mp_map import mp_map # build output dirs # template setup # calc the anomalies anomalies = self._calc_anomalies() anomalies_pcll, lons_pcll = self.utils.shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds # mesh the lons and lats and unravel them to 1-D lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ] # convert into pandas.DataFrame and drop all the NaNs -- land-only dataset anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in anomalies_pcll ] xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data ) # argument setup -- HARDWIRED # src_transform = affine.Affine( 0.5, 0.0, -180.0, 0.0, -0.5, 90.0 ) # src_nodata = -9999.0 # [!] THE ABOVE ARE INCORRECT FOR THE MODELED DATA # output_filenames setup dates = ds.time.to_pandas() years = dates.apply( lambda x: x.year ).tolist() months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ] month_year = [ (month, year) for year in years for month in months ] # read in the pre-processed 12-month climatology clim_list = sorted( glob.glob( os.path.join( self.clim_path, '*.tif' ) ) ) # this could catch you. clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, clim_list ) } # [!] THIS BELOW NEEDS RE-WORKING FOR THE AR5 DATA MODELED DATA output_filenames = [ os.path.join( downscaled_path, '_'.join([ variable, self.metric, cru_ts_version, 'downscaled', month, str(year) ])+'.tif' ) for month, year in month_year ] # set downscaling_operation based on self.absolute boolean if self.absolute == True: downscaling_operation = 'add' elif self.absolute == False: downscaling_operation = 'mult' else: AttributeError( 'downscaling operation: self.absolute must be boolean' ) args_list = [ { 'anom_df':anom_df, 'meshgrid_tuple':(xi, yi), 'template_raster_fn':template_raster_fn, 'lons_pcll':lons_pcll, 'src_transform':src_transform, 'src_crs':self.src_crs, \ 'src_nodata':src_nodata, 'output_filename':out_fn, 'baseline_arr':clim_dict[ self._fn_month_grouper( out_fn ) ], 'downscaling_operation':downscaling_operation, 'post_downscale_function':self.post_downscale_function, 'write_anomalies':self.write_anomalies } for anom_df, out_fn in zip( anom_df_list, output_filenames ) ] # run anomalies interpolation and downscaling in a single go. # ( anom_df, meshgrid_tuple, template_raster_fn, lons_pcll, src_transform, src_crs, src_nodata, output_filename, write_anomalies ) out = mp_map( lambda args: self._interp_downscale_wrapper( args_dict=args ), args_list, nproc=self.ncores ) return 'downscaling complete. files output at: %s' % base_path
# files to folders: import os, itertools, glob, shutil from pathos.mp_map import mp_map from functools import partial models = ['MRI-CGCM3' , 'GFDL-CM3', 'CCSM4', 'IPSL-CM5A-LR'] base_path = '/atlas_scratch/apbennett/Calibration/HighCalib/FMO_Calibrated/' scenarios = ['NoFMO','AltFMO',''] for model in models : for scenario in scenarios : if scenario == '': maps_path = os.path.join(base_path, '_'.join([model,'rcp85']),'Maps') print maps_path else : maps_path = os.path.join(base_path, '_'.join([model,'rcp85', scenario]),'Maps') print maps_path variables = ['Age', 'Veg', 'FireScar', 'BasalArea', 'BurnSeverity'] # more can be added if they are needed. out = [ i for i in itertools.product( [maps_path], variables ) ] # run for i in out: l = glob.glob( os.path.join( *i ) + '*.tif' ) # run it in parallel: f = partial( move_files, output_path=maps_path ) out_filenames = mp_map( f, sequence=l, nproc=32 )
def downscale_cru_ts( self, *args, **kwargs ): ''' run the CRU downscaling using the monthly climatology files given ''' from pathos.mp_map import mp_map import glob, affine, rasterio nc_varname = self._get_varname_cru( ) # handle cases where the desired varname != one parsed from file. # set it to self -- DangerTown™ if self.variable == None: self.variable = nc_varname # build output dirs anomalies_path = os.path.join( self.base_path, self.variable, 'anom' ) if not os.path.exists( anomalies_path ): os.makedirs( anomalies_path ) downscaled_path = os.path.join( self.base_path, self.variable, 'downscaled' ) if not os.path.exists( downscaled_path ): os.makedirs( downscaled_path ) # template setup template_raster = rasterio.open( self.template_raster_fn ) template_meta = template_raster.meta template_meta.update( crs={'init':'epsg:3338'} ) # make a mask with values of 0=nodata and 1=data template_raster_mask = template_raster.read_masks( 1 ) # mask of band 1 is all we need template_raster_mask[ template_raster_mask == 255 ] = 1 anomalies = self._calc_anomalies( self.cru_ts, absolute=self.absolute ) anomalies_pcll, lons_pcll = self.utils.shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds # mesh the lons and lats and unravel them to 1-D lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ] # convert into pandas.DataFrame and drop all the NaNs -- land-only dataset anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in anomalies_pcll ] xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data ) # argument setup -- HARDWIRED src_transform = affine.Affine( 0.5, 0.0, -180.0, 0.0, -0.5, 90.0 ) src_nodata = -9999.0 # output_filenames setup dates = anomalies.time.to_pandas() years = np.unique( dates.apply( lambda x: x.year ) ).tolist() # years = np.unique( self._get_years_cru( self.cru_ts ) ) # CHANGED! cru_ts_version = self._get_version_cru( self.cru_ts ) # works if naming convention stays same months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ] month_year = [ (month, year) for year in years for month in months ] # read in the pre-processed 12-month climatology clim_list = sorted( glob.glob( os.path.join( self.clim_path, '*.tif' ) ) ) # this could catch you. clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, clim_list ) } output_filenames = [ os.path.join( downscaled_path, '_'.join([ self.variable, self.metric, cru_ts_version, 'downscaled', month, str(year) ])+'.tif' ) for month, year in month_year ] # set downscaling_operation based on self.absolute boolean if self.absolute == True: downscaling_operation = 'add' elif self.absolute == False: downscaling_operation = 'mult' else: AttributeError( 'downscaling operation: self.absolute must be boolean' ) args_list = [ { 'anom_df':anom_df, 'meshgrid_tuple':(xi, yi), 'template_raster_fn':self.template_raster_fn, 'lons_pcll':lons_pcll, 'src_transform':src_transform, 'src_crs':self.src_crs, 'src_nodata':src_nodata, 'output_filename':out_fn, 'baseline_arr':clim_dict[ self._fn_month_grouper( out_fn ) ], 'downscaling_operation':downscaling_operation, 'post_downscale_function':self.post_downscale_function, 'write_anomalies':self.write_anomalies } for anom_df, out_fn in zip( anom_df_list, output_filenames ) ] # run anomalies interpolation and downscaling in a single go. out = mp_map( lambda args: self._interp_downscale_wrapper( args_dict=args ), args_list, nproc=self.ncores ) return 'downscaling complete. files output at: %s' % self.base_path
except: pass return shutil.copy(fn, out_fn) if __name__ == "__main__": import os, glob from pathos.mp_map import mp_map from functools import partial # base_dir = '/workspace/Shared/Tech_Projects/ESGF_Data_Access/project_data/tem_data_sep2016/raw/cmip5' # /output1/NASA-GISS/GISS-E2-R/historical/mon/atmos/Amon/r1i1p1/v20121015/hur base_dir = "/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/cmip5/prepped" output_dir = "/workspace/Shared/Tech_Projects/ESGF_Data_Access/project_data/tem_data_sep2016/cmip5/prepped" filelist = [] for root, subs, files in os.walk(base_dir): if len(files) > 0: filelist = filelist + [ os.path.join(root, fn) for fn in files if fn.endswith(".nc") and "tas_" or "pr_" in fn ] f = partial(copy_fn, output_dir=output_dir) done = mp_map(f, filelist, nproc=32) # REACCESS WITH SYNDA ALL THE FILES WE NEED, THIS WAY WE CAN AUTOPURGE THE OLD VERSIONS WE HAVE HERE. # project=CMIP5 # model=MRI-CGCM3 GISS-E2-R GFDL-CM3 IPSL-CM5A-LR CCSM4 # experiment=rcp26 # ensemble=r1i1p1 # variable[atmos][mon]=clt # timeslice=1800-2150
if __name__ == '__main__': import os, glob, subprocess, itertools from pathos.mp_map import mp_map output_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/downscaled' ncpus = 32 project = 'cmip5' variables = [ 'tas', 'pr' ] models = [ 'IPSL-CM5A-LR', 'MRI-CGCM3', 'GISS-E2-R', 'GFDL-CM3', 'NCAR-CCSM4', '5ModelAvg' ] scenarios = [ 'historical', 'rcp26', 'rcp45', 'rcp60', 'rcp85' ] commands = [] for variable, model, scenario in itertools.product( variables, models, scenarios ): if scenario == 'historical': base_path = '/Data/Base_Data/Climate/AK_CAN_2km/historical/AR5_CMIP5_models' else: base_path = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models' # print( '{} {} {}'.format( variable, model, scenario ) ) base = os.path.join( base_path, scenario, model, variable ) out = os.path.join( output_path, model, scenario ) if not os.path.exists( out ): os.makedirs( out ) # symlink them to their new directory structure commands = commands + [ ' '.join([ 'cp', '-rs', base, out ]) ] # _ = subprocess.call([ 'cp', '-rs', base, out ]) final = mp_map( run, commands, nproc=ncpus )
def downscale_ar5_ts( self, *args, **kwargs ): # * * * * * * * * * * # template setup from pathos.mp_map import mp_map import glob, affine, rasterio nc_varname = self._get_varname_ar5() # handle cases where the desired varname != one parsed from file. if self.variable == None: variable = nc_varname else: variable = self.variable print variable # build output dirs anomalies_path = os.path.join( base_path, variable, 'anom' ) if not os.path.exists( anomalies_path ): os.makedirs( anomalies_path ) downscaled_path = os.path.join( base_path, variable, 'downscaled' ) if not os.path.exists( downscaled_path ): os.makedirs( downscaled_path ) # * * * * * * * * * * # calc the anomalies anomalies = self._calc_anomalies() anomalies_pcll, lons_pcll = self.utils.shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds # mesh the lons and lats and unravel them to 1-D lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ] # convert into pandas.DataFrame and drop all the NaNs -- land-only dataset anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in anomalies_pcll ] xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data ) # some metadata src_transform = self._calc_ar5_affine() # argument setup -- HARDWIRED src_nodata = None # DangerTown # src_crs = {'init':'epsg:4326'} # DangerTown # output_filenames setup dates = anomalies.time.to_pandas() years = np.unique( dates.apply( lambda x: x.year ) ).tolist() months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ] month_year = [ (month, year) for year in years for month in months ] # read in the pre-processed 12-month climatology clim_list = sorted( glob.glob( os.path.join( self.clim_path, '*.tif' ) ) ) # this could catch you. clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, clim_list ) } # [!] THIS BELOW NEEDS RE-WORKING FOR THE AR5 DATA MODELED DATA output_filenames = [ os.path.join( downscaled_path, '_'.join([ variable, self.metric, 'ar5', 'downscaled', month, str(year) ])+'.tif' ) for month, year in month_year ] # set downscaling_operation based on self.absolute boolean if self.absolute == True: downscaling_operation = 'add' elif self.absolute == False: downscaling_operation = 'mult' else: AttributeError( 'downscaling operation: self.absolute must be boolean' ) args_list = [ { 'anom_df':anom_df, 'meshgrid_tuple':(xi, yi), 'template_raster_fn':template_raster_fn, 'lons_pcll':lons_pcll, 'src_transform':src_transform, 'src_crs':self.src_crs, 'src_nodata':src_nodata, 'output_filename':out_fn, 'baseline_arr':clim_dict[ self._fn_month_grouper( out_fn ) ], 'downscaling_operation':downscaling_operation, 'post_downscale_function':self.post_downscale_function, 'write_anomalies':self.write_anomalies } for anom_df, out_fn in zip( anom_df_list, output_filenames ) ] # run anomalies interpolation and downscaling in a single go. # ( anom_df, meshgrid_tuple, template_raster_fn, lons_pcll, src_transform, src_crs, src_nodata, output_filename, write_anomalies ) out = mp_map( lambda args: self._interp_downscale_wrapper( args_dict=args ), args_list, nproc=self.ncores ) return 'downscaling complete. files output at: %s' % base_path
for m in models: out = {} for v in variables: # new delta version path = os.path.join( base_dir,'downscaled_minmax', m, scenario, v ) files = glob.glob( os.path.join( path, '*.tif' ) ) files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) # make a mask rst = rasterio.open( files[0] ) # mask_arr = np.empty_like( rst.read(1) ) shapes = ((geom,value) for geom, value in zip(shp.geometry, [0])) burned = features.rasterize(shapes=shapes, out_shape=rst.shape, fill=1, transform=rst.affine ) f = partial( masked_mean, mask=burned, bounds=None ) out[ v ] = mp_map( f, files, nproc=4 ) # standard delta version path = os.path.join( base_dir, 'downscaled', m, scenario, v ) files = glob.glob( os.path.join( path, '*.tif' ) ) files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) # make a mask rst = rasterio.open( files[0] ) # mask_arr = np.empty_like( rst.read(1) ) shapes = ((geom,value) for geom, value in zip(shp.geometry, [0])) burned = features.rasterize(shapes=shapes, out_shape=rst.shape, fill=1, transform=rst.affine ) f = partial( masked_mean, mask=burned, bounds=None ) out[ v+'_old' ] = mp_map( f, files, nproc=4 )
# unpack variable = args.variable base_dir = args.base_dir # # # # #TESTING # base_dir = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data' # variable = 'tas' # # # # # # # # # some setup args base_dir = os.path.join( base_dir, 'downscaled' ) variables = [ variable ] # ['pr','tas','tasmax', 'tasmin' ] scenarios = [ 'historical', 'rcp26', 'rcp45', 'rcp60', 'rcp85' ] models = [ 'IPSL-CM5A-LR', 'MRI-CGCM3', 'GISS-E2-R', 'GFDL-CM3', 'NCAR-CCSM4' ] for variable, scenario in itertools.product( variables, scenarios ): if scenario == 'historical': begin = 1900 end = 2005 else: begin = 2006 end = 2100 # list the files we want input_files = [ list_files( os.path.join( base_dir, model, scenario, variable ), begin, end ) for model in models ] grouped = zip( *input_files ) # run it in parallel output_filenames = mp_map( generate, grouped, nproc=32 )
def _main( x, *args, **kwargs ): ''' run the CRU downscaling using the monthly climatology files given ''' from pathos.mp_map import mp_map import glob, affine nc_varname = get_varname_cru( nc_fn ) # handle cases where the desired varname is not the same as the one parsed from file. if variable == None: variable = nc_varname else: variable = nc_varname # build output dirs anomalies_path = os.path.join( base_path, variable, 'anom' ) if not os.path.exists( anomalies_path ): os.makedirs( anomalies_path ) downscaled_path = os.path.join( base_path, variable, 'downscaled' ) if not os.path.exists( downscaled_path ): os.makedirs( downscaled_path ) # template setup template_raster = rasterio.open( template_raster_fn ) template_meta = template_raster.meta template_meta.update( crs={'init':'epsg:3338'} ) # make a mask with values of 0=nodata and 1=data template_raster_mask = template_raster.read_masks( 1 ) # mask of band 1 is all we need template_raster_mask[ template_raster_mask == 255 ] = 1 anomalies = calc_anomalies( nc_fn, nc_varname, climatology_begin, climatology_end, absolute ) # the absolute calculation needs some thought anomalies_pcll, lons_pcll = shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds # mesh the lons and lats and unravel them to 1-D lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ] # convert into pandas.DataFrame and drop all the NaNs -- land-only dataset anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in dat_pcll ] xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data ) # argumet setup -- HARDWIRED src_transform = affine.Affine( 0.5, 0.0, -180.0, 0.0, -0.5, 90.0 ) src_crs = {'init':'epsg:4326'} src_nodata = -9999.0 # output_filenames setup years = get_years_cru( nc_fn ) cru_ts_version = get_version_cru( nc_fn ) # works if naming convention stays same months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ] month_year = [ (month, year) for year in years for month in months ] output_filenames = [ os.path.join( anomalies_path, '_'.join([ variable, metric, 'cru_ts'+str(cru_ts_version), 'anom', month, year ])+'.tif' ) for month, year in month_year ] # make a list of args to pass to the interpolation function args_list = [ {'anomalies':anom_df, 'meshgrid_tuple':(xi, yi), 'lons_pcll':lons_pcll, \ 'template_raster_fn':template_raster_fn, 'src_transform':src_transform, \ 'src_crs':src_crs, 'src_nodata':src_nodata, 'output_filename':fn } \ for anom_df, fn in zip( anom_df_list, output_filenames ) ] anomalies = mp_map( lambda x: interpolate_anomalies( *x ), args_list, nproc=ncores ) # read in the pre-processed 12-month climatology l = sorted( glob.glob( os.path.join( cl20_path, '*.tif' ) ) ) # this could catch you. clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, l ) } # group the data by months out = pd.Series( out ) out_months = out.apply( fn_month_grouper ) months_grouped = out.groupby( out_months ) # unpack groups for parallelization mg = [(i,j) for i,j in months_grouped ] # make an args tuple to pass to the function args_list = [ ( i[1], clim_dict[i[0]], downscaled_path, absolute ) for i in mg ] # downscale / write to disk out = mp_map( lambda args: downscale_cru_historical( *args ), args_list, nproc=ncores ) return 'downscaling complete. files output at: %s' % base_path
model = '5ModelAvg' scenario = 'rcp45' begin = 2010 end = 2015 variables = ['tasmax', 'tas', 'tasmin'] out = {} for v in variables: path = os.path.join( base_dir,'EPSCOR_SC_DELIVERY_AUG2016','downscaled', model, scenario, v ) # for testing with new downscaler if v == 'tas': path = os.path.join( base_dir,'downscaled_tas_pr_epscor_sc', model, scenario, v ) files = glob.glob( os.path.join( path, '*.tif' ) ) files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) out[ v ] = mp_map( masked_mean, files, nproc=4 ) plot_df = pd.DataFrame( out ) plot_df.index = pd.date_range( start=str(begin), end=str(end+1), freq='M' ) plot_df = plot_df[['tasmax', 'tas', 'tasmin']] # get em in the order for plotting # now plot the dataframe if begin == end: title = 'EPSCoR SC AOI Temp Metrics {} {} {}'.format( model, scenario, begin ) else: title = 'EPSCoR SC AOI Temp Metrics {} {} {} - {}'.format( model, scenario, begin, end ) figsize = (13,9) colors = ['red', 'black', 'blue' ] ax = plot_df.plot( kind='line', title=title, figsize=figsize, color=colors )
# begin = 1950 # end = 1965 # else: # old_dir = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models' # begin = 2060 # end = 2070 figsize = (16,9) out_raw = {} out_anom = {} for v in variables: # raw path = os.path.join( base_dir, m, scenario, v ) files = glob.glob( os.path.join( path, '*.tif' ) ) files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) out_raw[ v ] = mp_map( masked_mean, files, nproc=4 ) # anom path = os.path.join( base_dir, m, scenario, v, 'anom' ) files = glob.glob( os.path.join( path, '*.tif' ) ) files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-2 ), elem_month=-3, elem_year=-2 ) out_anom[ v ] = mp_map( masked_mean, files, nproc=4 ) # if v == 'tas' or v == 'pr': # if m == 'CRU_TS323': # path = os.path.join( old_dir, v ) # else: # path = os.path.join( old_dir, scenario, m, v ) # files = glob.glob( os.path.join( path, '*.tif' ) ) # files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) ) # out[ v+'_old' ] = mp_map( masked_mean, files, nproc=4 )
def interp_na( self ): ''' np.float32 method = [str] one of 'cubic', 'near', 'linear' return a list of dicts to pass to the xyz_to_grid in parallel ''' from copy import copy import pandas as pd import numpy as np from pathos.mp_map import mp_map # remove the darn scientific notation np.set_printoptions( suppress=True ) output_dtype = np.float32 # if 0-360 leave it alone if ( self.ds.lon > 200.0 ).any() == True: dat, lons = self.ds[ self.variable ].data, self.ds.lon self._lonpc = lons else: # greenwich-centered rotate to 0-360 for interpolation across pacific dat, lons = self.rotate( self.ds[ self.variable ].values, self.ds.lon, to_pacific=True ) self._rotated = True # update the rotated attribute self._lonpc = lons # mesh the lons and lats and unravel them to 1-D xi, yi = np.meshgrid( self._lonpc, self.ds.lat.data ) lo, la = [ i.ravel() for i in (xi,yi) ] # setup args for multiprocessing df_list = [ pd.DataFrame({ 'x':lo, 'y':la, 'z':d.ravel() }).dropna( axis=0, how='any' ) for d in dat ] args = [ {'x':np.array(df['x']), 'y':np.array(df['y']), 'z':np.array(df['z']), \ 'grid':(xi,yi), 'method':self.method, 'output_dtype':output_dtype } for df in df_list ] # # # # USE MLAB's griddata which we _can_ parallelize def wrap( d ): ''' simple wrapper around utils.xyz_to_grid for mp_map''' x = np.array( d['x'] ) y = np.array( d['y'] ) z = np.array( d['z'] ) xi, yi = d['grid'] return utils.xyz_to_grid( x, y, z, (xi,yi), interp='linear' ) # # # # try: print( 'processing interpolation to convex hull in parallel using {} cpus.'.format( self.ncpus ) ) dat_list = mp_map( wrap, args, nproc=self.ncpus ) dat_list = [ i.data for i in dat_list ] # drop the output mask dat = np.array( dat_list ) except: print( 'processing cru re-gridding in serial due to multiprocessing issues...' ) dat = np.array([ wrap( **i ) for i in args ]) lons = self._lonpc if self._rotated == True: # rotate it back dat, lons = self.rotate( dat, lons, to_pacific=False ) # place back into a new xarray.Dataset object for further processing self.ds = self.ds.update( { self.variable:( ['time','lat','lon'], dat ) } ) print( 'ds interpolated updated into self.ds' ) return 1
# path_list = [ os.path.join( input_path, model, variable, 'downscaled', experiment, '*.tif' ) for model in models for variable in variables for experiment in experiments ] # for path in path_list: # print( 'running: %s ' % path ) # # path='/Data/malindgren/cru_november_final/IEM/ar5/MRI-CGCM3/cld/downscaled/*rcp26*.tif' # cld = pd.Series( glob.glob( path ) ) # print( 'file count: %d' % len( cld ) ) # output_filenames = cld.apply( lambda x: x.replace('cld', 'rsds').replace( '_pct_', '_MJ-m2-d1_' ) ).tolist() # month_grouper = cld.apply( lambda x: os.path.basename( x ).split( '.' )[0].split( '_' )[-2] ) # args_list = [ ( cld, girr[ month ], out ) for cld, out, month in zip( cld.tolist(), output_filenames, month_grouper ) ] # # run it in parallel # out = mp_map( lambda x: generate_nirr( *x ), args_list, nproc=ncores ) # # # CRU TS 3.23 Historical DATA VERSION # list the cloud files for a series path = '/workspace/Shared/Tech_Projects/ALFRESCO_Inputs/project_data/TEM_Data/cru_october_final/IEM/cru_ts31/cld/downscaled/*.tif' print( 'running: %s ' % path ) cld = pd.Series( glob.glob( path ) ) print( 'file count: %d' % len( cld ) ) output_filenames = cld.apply( lambda x: x.replace('cld', 'rsds').replace( '_pct_', '_MJ-m2-d1_' ) ).tolist() month_grouper = cld.apply( lambda x: os.path.basename( x ).split( '.' )[0].split( '_' )[-2] ) args_list = [ ( cld, girr[ month ], out ) for cld, out, month in zip( cld.tolist(), output_filenames, month_grouper ) ] # run it in parallel out = mp_map( lambda x: generate_nirr( *x ), args_list, nproc=ncores )