Beispiel #1
0
    def interp_na(self):
        '''
		np.float32
		method = [str] one of 'cubic', 'near', 'linear'

		return a list of dicts to pass to the xyz_to_grid in parallel
		'''
        from copy import copy
        import pandas as pd
        import numpy as np
        from pathos.mp_map import mp_map

        # remove the darn scientific notation
        np.set_printoptions(suppress=True)
        output_dtype = np.float32

        # if 0-360 leave it alone
        if (self.ds.lon > 200.0).any() == True:
            dat, lons = np.array(self.ds.data), np.array(self.ds.lon)
            self._lonpc = lons
        else:
            # greenwich-centered rotate to 0-360 for interpolation across pacific
            dat, lons = self.utils.rotate(np.array(self.ds.values),
                                          np.array(self.ds.lon),
                                          to_pacific=True)
            self._rotated = True  # update the rotated attribute
            self._lonpc = lons

        # mesh the lons and lats and unravel them to 1-D
        xi, yi = np.meshgrid(self._lonpc, self.ds.lat.data)
        lo, la = [i.ravel() for i in (xi, yi)]

        # setup args for multiprocessing
        df_list = [
            pd.DataFrame({
                'x': lo,
                'y': la,
                'z': d.ravel()
            }).dropna(axis=0, how='any') for d in dat
        ]

        args = [ {'x':np.array(df['x']), 'y':np.array(df['y']), 'z':np.array(df['z']), \
          'grid':(xi,yi), 'method':self.historical.method, 'output_dtype':output_dtype } for df in df_list ]

        print(
            'processing interpolation to convex hull in parallel using {} cpus.'
            .format(self.ncpus))
        dat_list = mp_map(self.wrap, args, nproc=self.ncpus)
        dat_list = [np.array(i) for i in dat_list]  # drop the output mask
        dat = np.array(dat_list)

        lons = self._lonpc
        if self._rotated == True:  # rotate it back
            dat, lons = self.utils.rotate(dat, lons, to_pacific=False)

        # place back into a new xarray.Dataset object for further processing
        # self.ds = self.ds.update( { self.historical.variable:( ['time','lat','lon'], dat ) } )
        self.ds.data = dat
        print('ds interpolated updated into self.ds')
        return 1
def make_decadal_seasonal( base_path, output_path, variable, model, scenario, decade, ncpus, agg_metric ):
	'''
	function to calculate and output mean seasonal monthly data across decades
	
	ARGUMENTS:
	----------
	base_path = [  ]  
	output_path = [  ]  
	model = [  ]  
	scenario = [  ]  
	variable = [  ]  
	begin = [  ]  
	end = [  ]  
	ncpus = [  ]  

	RETURNS
	-------
	output_directory of newly produced GeoTiffs if successful. else, error.

	'''
	decade_begin, decade_end = decade

	# modeled data
	files = glob.glob( os.path.join( base_path, model, scenario, variable, '*' + agg_metric + '*.tif' ) )
	files = only_years( files, begin=decade_begin, end=decade_end, split_on='_', elem_year=-1 )

	# season_names = [ get_month_seaon( fn ) for fn in files ]
	years = [ int(get_year( fn )) for fn in files ]

	# min / max years
	start_year =  str( min(years) )
	end_year = str( max(years) )

	seasons = [ get_season( fn ) for fn in files ]

	# drop data for start_year JF and end_year this is useful for annuals, but not really decadals
	# files = [ fn for fn in files if not '_'.join([ '01',start_year ]) in fn if not '_'.join([ '02',start_year ]) in fn if not '_'.join([ '12',end_year ]) in fn ]
	files = pd.Series( files )

	grouped_seasons = files.groupby( seasons )

	args = [ ( season_name, file_group.tolist(), output_path, agg_metric ) for season_name, file_group in grouped_seasons ]

	_ = mp_map( wrap, args, nproc=ncpus )
	return args
		MMSI_grouped_keep = MMSI_grouped_keep.groupby( new_groups ).apply( fix_voyage_id )

		# MMSI_grouped_keep.loc[ :, 'Voyage' ] = MMSI_grouped_keep.loc[ :, 'clusters' ]
		# voyage_group_names = grouped.groups.keys()

		# run the voyage cleaner function on the grouped voyage data frames
		# gdf_3338 = MMSI_grouped_keep.groupby( 'Voyage' ).apply( clean_grouped_voyages( df ) )
		# parallelize it 
		MMSI_grouped_voyages = pd.Series([ j.copy() for i,j in MMSI_grouped_keep.groupby( 'Voyage' ) ])
		
		del MMSI_grouped_keep, df # cleanup

		print ('  running voyage cleaner')
		if len( MMSI_grouped_voyages ) >= 2000:
			splitter = np.array_split( range( len( MMSI_grouped_voyages ) ), int( len( MMSI_grouped_voyages ) / 1000 ) )
			out = [ mp_map( clean_grouped_voyages, sequence=MMSI_grouped_voyages[ i ], nproc=ncpus ) for i in splitter ]
			# unlist
			out = [ j for i in out for j in i ]
		else:
			out = mp_map( clean_grouped_voyages, sequence=MMSI_grouped_voyages, nproc=ncpus )

		df = pd.concat( ( i for i in out if i.shape[0] > 0 ) )

		del MMSI_grouped_voyages # cleanup

		# run the intersect testing
		MMSI_grouped_goodbad = pd.Series([ j.copy() for i,j in df.groupby( 'Voyage' ) ])
		break_goodbad_partial = partial( break_goodbad, land=land ) # partial function build
		
		if len( MMSI_grouped_goodbad ) >= 2000:
			splitter = np.array_split( range( len( MMSI_grouped_goodbad ) ), int( len( MMSI_grouped_goodbad ) / 1000 ) )
	from pathos.mp_map import mp_map

	# read in the args
	base_path = '/workspace/Shared/Tech_Projects/ESGF_Data_Access/project_data/tem_data_sep2016'
	cru_path = '/Data/Base_Data/Climate/World/CRU_grids/CRU_TS323/'
	args = []
	for model in ['CCSM4', 'GFDL-CM3', 'IPSL-CM5A-LR', 'MRI-CGCM3', 'GISS-E2-R']:
		for scenario in ['historical', 'rcp26','rcp45','rcp60','rcp85']:
			tas_list = sorted( glob.glob( os.path.join( base_path, 'downscaled', model, scenario, 'tas', '*.tif' ) ) )
			hur_list = sorted( glob.glob( os.path.join( base_path, 'downscaled', model, scenario, 'hur', '*.tif' ) ) )

			# make args to pass to the run function
			args = args + zip( tas_list, hur_list )
	
	# run in parallel
	out = mp_map( run, args, nproc=32 )

	# # # CONVERT CL20 2km to vap
	tas_list = sorted(glob.glob( os.path.join( base_path, 'cru', 'cru_cl20', 'tas', '*.tif' ) ))
	hur_list = sorted(glob.glob( os.path.join( base_path, 'cru', 'cru_cl20', 'hur', '*.tif' ) ))
	args = zip( tas_list, hur_list )
	out = mp_map( run, args, nproc=12 )

	# # # CONVERT CRU TS323 vap/tas to hur --> output to a non CF-compliant NetCDF that will be read back in with xarray
	tas = xr.open_dataset( '/Data/Base_Data/Climate/World/CRU_grids/CRU_TS323/cru_ts3.23.1901.2014.tmp.dat.nc' )
	vap = xr.open_dataset( '/Data/Base_Data/Climate/World/CRU_grids/CRU_TS323/cru_ts3.23.1901.2014.vap.dat.nc' )
	hur = convert_to_hur( tas.tmp, vap.vap )
	hur_ds = hur.to_dataset( name='hur' )
	hur_ds.to_netcdf( '/Data/Base_Data/Climate/World/CRU_grids/CRU_TS323/cru_ts3.23.1901.2014.hur.SNAP_derived.dat.nc' )

				if scenario == 'historical':
					old_dir = '/Data/Base_Data/Climate/AK_CAN_2km/historical/AR5_CMIP5_models'
					begin = 1950
					end = 1965
				else:
					old_dir = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models'
					begin = 2060
					end = 2070

			figsize = (16,9)
			out = {}
			for v in variables:
				path = os.path.join( base_dir,'downscaled', m, scenario, v )
				files = glob.glob( os.path.join( path, '*.tif' ) )
				files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )
				out[ v ] = mp_map( masked_mean, files, nproc=4 )
				if v == 'tas' or v == 'pr':
					if m == 'CRU_TS323':
						path = os.path.join( old_dir, v )
					else:	
						path = os.path.join( old_dir, scenario, m, v )
					files = glob.glob( os.path.join( path, '*.tif' ) )
					files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )
					out[ v+'_old' ] = mp_map( masked_mean, files, nproc=4 )

			plot_df = pd.DataFrame( out )
			plot_df.index = pd.date_range( start=str(begin), end=str(end+1), freq='M' )
			
			# sort the columns for output plotting cleanliness:
			if 'tas' in variables:
				col_list = ['tasmax', 'tas_old', 'tas', 'tasmin']
	def downscale_cru_ts( self, *args, **kwargs ):
		'''
		run the CRU downscaling using the monthly climatology files given
		'''
		from pathos.mp_map import mp_map
		import glob, affine, rasterio

		nc_varname = self._get_varname_cru( )
		# handle cases where the desired varname is not the same as the one parsed from file.
		if self.variable == None:
			variable = nc_varname
		else:
			variable = self.variable
		
		# build output dirs
		anomalies_path = os.path.join( base_path, variable, 'anom' )
		if not os.path.exists( anomalies_path ):
			os.makedirs( anomalies_path )

		downscaled_path = os.path.join( base_path, variable, 'downscaled' )
		if not os.path.exists( downscaled_path ):
			os.makedirs( downscaled_path )

		# template setup 
		template_raster = rasterio.open( self.template_raster_fn )
		template_meta = template_raster.meta
		template_meta.update( crs={'init':'epsg:3338'} )

		# make a mask with values of 0=nodata and 1=data
		template_raster_mask = template_raster.read_masks( 1 ) # mask of band 1 is all we need
		template_raster_mask[ template_raster_mask == 255 ] = 1

		anomalies = self.utils.calc_anomalies( self.cru_ts, variable, absolute=self.absolute )
		anomalies_pcll, lons_pcll = self.utils.shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds

		# mesh the lons and lats and unravel them to 1-D
		lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ]
		
		# convert into pandas.DataFrame and drop all the NaNs -- land-only dataset
		anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in anomalies_pcll ]
		xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data )

		# argument setup -- HARDWIRED
		src_transform = affine.Affine( 0.5, 0.0, -180.0, 0.0, -0.5, 90.0 )
		src_nodata = -9999.0
			
		# output_filenames setup
		years = np.unique( self._get_years_cru( self.cru_ts ) )
		cru_ts_version = self._get_version_cru( self.cru_ts ) # works if naming convention stays same
		months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ]
		month_year = [ (month, year) for year in years for month in months ]

		output_filenames = [ os.path.join( anomalies_path, '_'.join([ variable, self.metric, cru_ts_version, 'anom', month, str(year) ])+'.tif' )
								for month, year in month_year ]

		# make a list of args to pass to the interpolation function
		args_list = [ {'anom_df':anom_df, 'meshgrid_tuple':(xi, yi), 'template_raster_fn':template_raster_fn, 
					'lons_pcll':lons_pcll, 'src_transform':src_transform, 'src_crs':self.src_crs, \
					'src_nodata':src_nodata, 'output_filename':fn } \
					for anom_df, fn in zip( anom_df_list, output_filenames ) ]

		anomalies = mp_map( lambda args: self.utils.interpolate_anomalies( **args ), args_list, nproc=self.ncores )

		# read in the pre-processed 12-month climatology
		clim_list = sorted( glob.glob( os.path.join( self.clim_path, '*.tif' ) ) ) # this could catch you.
		clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, clim_list ) }

		# group the anomalies output filenames by months
		out = pd.Series( anomalies )
		out_months = out.apply( lambda x: DownscaleCRU._fn_month_grouper( fn=x ) )
		months_grouped = out.groupby( out_months )
		mg = [ (name, fn, fn.replace( 'anom', 'downscaled')) for name, group in months_grouped for fn in group.tolist() ]
		
		# output metadata
		meta = rasterio.open( mg[0][1] ).meta
		meta.update( compress='lzw' )
		
		# set downscaling_operation based on self.absolute boolean
		if self.absolute == True:
			downscaling_operation = 'add'
		elif self.absolute == False:
			downscaling_operation = 'mult'
		else:
			AttributeError( 'downscaling operation: self.absolute must be boolean' )

		# make an args tuple to pass to the function
		args_list = [ { 'anom_arr':rasterio.open( fn ).read(1), 'baseline_arr':clim_dict[i], \
						'output_filename':out_fn, 'downscaling_operation':downscaling_operation, \
						'meta':meta, 'post_downscale_function':self.post_downscale_function } \
						for i, fn, out_fn in mg ]

		# downscale / write to disk
		out = mp_map( lambda args: self.utils.downscale( **args ), args_list, nproc=self.ncores )
		return 'downscaling complete. files output at: %s' % base_path
    # more prep
    scenario, model, variable = path_slicer_ar5(input_path)
    template_raster_mask = rasterio.open(template_raster_mask_fn)
    output_path = os.path.join(output_base_path, scenario, model, variable)

    try:
        if not os.path.exists(output_path):
            os.makedirs(output_path)
    except:
        pass

    file_list = glob.glob(os.path.join(input_path, "*.tif"))
    output_filenames = [generate_output_fn(input_fn, output_path, group) for input_fn in file_list]

    args = zip(file_list, output_filenames, itertools.repeat(template_raster_mask_fn, len(file_list)))
    _ = mp_map(resample_to_1km, args, nproc=ncores)


# # # RUN THE ABOVE # # # #
# import os, glob
# import numpy as np

# # list the data we want
# input_path = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models'
# out = [ root for root, subs, files in os.walk( input_path ) \
# 		if len( glob.glob( os.path.join( root, '*.tif' ) ) ) > 0 and not 'derived' in root ]
# input_paths = np.unique( out ).tolist()

# model = 'CCSM4' # [ 'CCSM4', 'CNRM-CM5', 'GFDL-CM3', 'GISS-E2-R', 'IPSL-CM5A-LR', 'MPI-ESM-LR', 'MRI-CGCM3', '5modelAvg' ]
# scenarios = [ 'rcp26', 'rcp45', 'rcp60', 'rcp85' ]
if __name__ == '__main__':
	import os, glob, subprocess
	from pathos.mp_map import mp_map

	base_paths = ['/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models', '/Data/Base_Data/Climate/AK_CAN_2km/historical/AR5_CMIP5_models']

	out = []
	for base_path in base_paths:
		for root, subs, files in os.walk( base_path ):
			out = out + [ os.path.join( root, fn ) for fn in files if 'CCSM4' in fn ]

	out_files = [ fn.replace( 'CCSM4', 'NCAR-CCSM4' ) for fn in out ]
	args = zip( out, out_files )

	_ = mp_map( wrap, args, nproc=32 )

# * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
# # # # Here is how I renamed the 5modelAvg to 5ModelAvg, which is better
# * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 

def rename_file( in_fn, out_fn, *args, **kwargs ):
	import os, shutil
	dirname = os.path.dirname( out_fn )
	try:
		if not os.path.exists( dirname ):
			os.makedirs( dirname )
	except:
		pass	
	return shutil.copy( in_fn, out_fn )
		out.write_band( 1, arr )
	return fn.replace( '.tif', '_fix.tif' )

if __name__ == '__main__':
	import rasterio, glob, os
	import numpy as np
	import pandas as pd
	from pathos.mp_map import mp_map
	from functools import partial

	mask = '/workspace/Shared/Tech_Projects/ALFRESCO_Inputs/project_data/TEM_Data/extents/IEM_Mask_1km.tif'
	mask = rasterio.open( mask ).read( 1 )

	l = glob.glob( '/workspace/Shared/Tech_Projects/ALFRESCO_Inputs/project_data/TEM_Data/girr_radiation_cmip3_process/IEM/*.tif' )
	args = [ (i, mask) for i in l ]
	done = mp_map( lambda x: run_replace( *x ), args, nproc=32)




	# for fn in l:
	# 	rst = rasterio.open( fn )

	# 	meta = rst.meta
	# 	meta.update( compress='lzw' )

	# 	arr = rst.read( 1 )
	# 	ind = np.where( (mask == 1) & (arr > -3) )
	# 	ind_zip = zip( *ind )

	# 	# a little neighborhood math for the queens case
# some pathing
input_path = '/Data/malindgren/cru_november_final/IEM/clouds/ar5'
output_path = '/Data/malindgren/cru_november_final/final/IEM/ar5'

# make all combinations of the output variables
combinations = itertools.product( models, scenarios, variables )

for model, scenario, variable in combinations:
	print( ' '.join([ 'runnning: ', model, scenario]) )
	l = glob.glob( os.path.join( input_path, model, variable, 'downscaled', scenario, '*.tif' ) )
	out = os.path.join( output_path, model, variable, scenario )

	# remove them if they exist with the pct in the name
	ol = glob.glob( os.path.join( out, '*rsds_*_pct_*.tif' ) )
	if len( ol ) > 0:
		_ = mp_map( lambda x: os.unlink( x ), ol, nproc=32 )

	f = partial( shutil.move, dst=out )

	_ = mp_map( f, l, nproc=32 )



# # # # CHANGE METRIC NAME # # # # # # 
# # # #
# a tool to move the files to the needed locations in the final directory
# # # #

import os, glob, itertools, shutil
from pathos.mp_map import mp_map
from functools import partial
	climatology = ds_hist.tasmax.sel( time=slice( str(climbegin), str(climend) ) )
	climatology = climatology.groupby( 'time.month' ).apply( lambda x: np.mean( x, axis=0 ) )
	anomalies = ds.tasmax.groupby( 'time.month' ) - climatology
	anomalies[ 'mask' ] = rasterize( shapes, anomalies.coords, longitude='lon', latitude='lat', fill=0 )
	anom_mean = anomalies.sel( time=slice( str(begin), str(end) ) ).where( anomalies.mask == 1 ).mean( axis=(1,2) )

	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
	# now lets do the same thing with the rasterio-downscaled data
	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
	path = '/Users/malindgren/Documents/downscale_epscor/august_fix/EPSCOR_SE_DELIVERY_AUG2016/downscaled/5ModelAvg/rcp45/tasmax'
	# path = '/Users/malindgren/Documents/downscale_epscor/august_fix/EPSCOR_SC_DELIVERY_AUG2016/derived/grids/annual_seasonals/5ModelAvg/rcp45/tasmax'
	files = glob.glob( os.path.join( path, '*.tif' ) )
	files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )
	
	# get the means across space for each timestep
	down_mean = mp_map( masked_mean, files, nproc=4 )
	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
	# # # # # NOW LETS SEE THIS WITH THE LINEAR INTERPOLATION USED... as a TEST. # # # # # # # #
	# path = '/Users/malindgren/Documents/downscale_epscor/august_fix/CCSM4_clip/tasmax'
	# files = glob.glob( os.path.join( path, '*.tif' ) )
	# files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )
	
	# # get the means across space for each timestep
	# down_mean_linear = mp_map( masked_mean, files, nproc=4 )
	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
	# now lets do the same thing with the PRISM data
	# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
	# # clipped data with the below with the epscor crop/clip code:
	# base_path = '/Users/malindgren/Documents/downscale_epscor/tasmax'
						shp_fn, rst_fn, out_fn ])
	return out_fn

def wrap( x ):
	''' wrapper for clean multiprocessing call to pool.map '''
	return crop_clip( *x )

if __name__ == '__main__':
	import os, glob, itertools, rasterio
	import xarray as xr
	import numpy as np
	import pandas as pd
	from pathos.mp_map import mp_map

	# setup args
	# base_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/downscaled'
	base_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/derived_grids'
	# output_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/EPSCOR_SC_DELIVERY_SEP2016/downscaled'
	output_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/EPSCOR_SC_DELIVERY_SEP2016/derived/grids'
	ncpus = 32
	subdomain_fn = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/SCTC_studyarea/Kenai_StudyArea.shp'

	# list up all the args we want to run through the multicore clipping
	args_list = []
	for root, subs, files in os.walk( base_path ):
		tif_files = [ fn for fn in files if fn.endswith( '.tif' ) ]
		if len( tif_files ) > 0:
			args_list = args_list + [ ( subdomain_fn, os.path.join( root, fn ), os.path.join( root, fn ).replace( base_path, output_path ) ) for fn in tif_files ]
	
	out = mp_map( wrap, args_list, nproc=ncpus )
# print the input to screen
x = np.arange(N * nodes, dtype=np.float64)
print("Input: %s\n" % x)


# run sin2 in series, then print to screen
print("Running serial python ...")
y = map(sin2, x)
print("Output: %s\n" % np.asarray(y))


# map sin2 to the workers, then print to screen
print("Running mpi4py on %d cores..." % nodes)
y = mpi_map(sin2, x, nnodes=nodes)
print("Output: %s\n" % np.asarray(y))


# map sin2 to the workers, then print to screen
print("Running multiprocesing on %d processors..." % nodes)
y = mp_map(sin2, x, nproc=nodes)
print("Output: %s\n" % np.asarray(y))


# map sin2 to the workers, then print to screen
print("Running parallelpython on %d cpus..." % nodes)
y = pp_map(sin2, x, ncpus=nodes, servers=('mycpu.mydomain.com',))
print("Output: %s\n" % np.asarray(y))

# EOF
	# set bounds to interpolate over
	# xmin, ymin, xmax, ymax = (0,-90, 360, 90)
	xmin, ymin, xmax, ymax = (160, 0, 300, 90)

	# multiply arcminutes in degree by 360(180) for 10' resolution
	rows = 60 * ( ymax - ymin )
	cols = 60 * ( xmax - xmin )

	# build the output grid
	x = np.linspace( xmin, xmax, cols )
	y = np.linspace( ymin, ymax, rows )
	xi, yi = np.meshgrid( x, y )
	args_list = [ {'x':np.array(cru_df['lon']),'y':np.array(cru_df['lat']),'z':np.array(cru_df[month]),'xi':xi,'yi':yi} for month in months ]

	# run interpolation in parallel
	interped_grids = mp_map( regrid, args_list, nproc=12 )

	# stack and give a proper nodata value
	arr = np.array([ i.data for i in interped_grids ])
	arr[ np.isnan(arr) ] = -3.4e+38
	pcll_affine = transform_from_latlon( y, x )

	meta = {'affine': pcll_affine,
			'count': 1,
			'crs': {'init':'epsg:4326'},
			'driver': u'GTiff',
			'dtype': 'float32',
			'height': rows,
			'nodata': -3.4e+38,
			'width': cols,
			'compress':'lzw'}
	return out_fn

def wrap( x ):
	''' wrapper for clean multiprocessing call to pool.map '''
	return crop_clip( *x )

if __name__ == '__main__':
	import os, glob, itertools, rasterio
	import xarray as xr
	import numpy as np
	import pandas as pd
	from pathos.mp_map import mp_map

	# setup args
	base_path = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models'
	output_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/EPSCOR_SC_DELIVERY_SEP2016/derived/grids/monthly_decadals'
	ncpus = 32
	subdomain_fn = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/SCTC_studyarea/Kenai_StudyArea.shp'
	models = [ 'IPSL-CM5A-LR', 'MRI-CGCM3', 'GISS-E2-R', 'GFDL-CM3', 'NCAR-CCSM4', '5ModelAvg' ]

	# list up all the args we want to run through the multicore clipping
	fn_list = []
	for root, subs, files in os.walk( base_path ):
		if any( [ model in root for model in models ] ) == True:
			if 'derived' in root:
				if len( [ fn for fn in files if fn.endswith( '.tif' ) ] ) > 0:
					fn_list = fn_list + glob.glob( os.path.join( root, '*.tif' ) )

	args_list = [ make_args( rst_fn, subdomain_fn, output_path ) for rst_fn in fn_list if 'dof' in rst_fn or 'dot' in rst_fn or 'logs' in rst_fn ]
	out = mp_map( lambda x: wrap( x ), args_list, nproc=32 )
	
out_dir_lookup = {'dot':'decadal_dot','dof':'decadal_dof','logs':'decadal_logs' }

# get all files 
file_list = []
for root, subs, files in os.walk( in_path ):
	if len(files) > 0:
		file_list = file_list + [ os.path.join( root, fn ) for fn in files ]

# make some arguments for passing to shutil.copy
args = []
for fn in file_list:
	folder_name = out_dir_lookup[ os.path.basename( fn ).split( '_' )[0] ]
	out_path = os.path.join( output_path, folder_name )	
	new_fn = fn.replace( in_path, out_path ) 
	args = args + [(fn, new_fn)]

def copy_it( x ):
	fn, new_fn = x
	out_path = os.path.dirname( new_fn )
	try:
		if not os.path.exists( out_path ):
			os.makedirs( out_path )
	except:
		pass
	# out_path.replace( '/dof', '' )
	return shutil.copy( fn, new_fn )

_ = mp_map( copy_it, args, nproc=16 )


Beispiel #17
0
	# # # # TESTING STUFF # # # # # # #
	# forget the above for testing, lets use Stephs radians
	# latr = rasterio.open('/workspace/Shared/Tech_Projects/ESGF_Data_Access/project_data/tem_data_sep2016/radiance/radians.txt')
	# latr = latr.read( 1 )
	# # # # # # # # # # # # # # # # # #

	# calc ordinal days to compute
	ordinal_days = range( 1, 365+1, 1 )
	# make a monthly grouper of ordinal days
	ordinal_to_months = [ str(datetime.date.fromordinal( i ).month) for i in ordinal_days ]
	# convert those months to strings
	ordinal_to_months = [ ('0'+month if len( month ) < 2 else month) for month in ordinal_to_months  ]

	# calc girr
	f = partial( calc_ra, lat=lat_rad )
	Ra = mp_map( f, ordinal_days, nproc=32 )
	Ra_monthlies = pd.Series( Ra ).groupby( ordinal_to_months ).apply( lambda x: np.array(x.tolist()).mean( axis=0 ) )

	# iteratively put them back in the indexed locations we took them from
	meta = rst.meta
	meta.pop( 'transform' )
	meta.update( compress='lzw', count=1, dtype='float32' )
	for month in Ra_monthlies.index:
		arr = rst.read( 1 )
		arr[ data_ind ] = Ra_monthlies.loc[ month ].tolist()
		output_filename = os.path.join( output_path, 'girr_w-m2_{}_.tif'.format(str( month ) ) )
		with rasterio.open( output_filename, 'w', **meta ) as out:
			out.write( arr.astype( np.float32 ), 1 )


Beispiel #18
0
	def downscale( self, output_dir, prefix=None ):
		import affine
		from affine import Affine
		import itertools
		from functools import partial
		from pathos.mp_map import mp_map

		operation_switch = { 'add':self.add, 'mult':self.mult }

		def two_digit_month( x ):
			''' make 1 digit month a standard 2-digit for output filenames '''
			month = str( x )
			if len(month) == 1:
				month = '0'+month
			return month

		time_suffix = [ '_'.join([two_digit_month( t.month ), str(t.year)]) for t in self.anomalies.time.to_pandas() ]

		# handle missing variable / model names
		if self.varname != None:
			variable = self.varname
		elif self.historical.variable != None:
			variable = self.historical.variable
		else:
			variable = 'variable'

		if self.modelname != None:
			model = self.modelname
		elif self.historical.model != None:
			model = self.historical.model
		else:
			model = 'model'

		output_filenames = [ os.path.join( output_dir, '_'.join([variable, self.historical.metric, self.historical.units, \
					self.historical.project, model, self.historical.scenario, ts]) + '.tif')  for ts in time_suffix ]

		# if there is a specific name prefix, use it
		if prefix != None:
			output_filenames = [ os.path.join( output_dir, '_'.join([prefix, ts]) + '.tif' ) for ts in time_suffix ]

		# rotate to pacific-centered
		if ( self.anomalies.lon.data > 200.0 ).any() == True:
			dat, lons = ( self.anomalies, self.anomalies.lon )
			self.anomalies_rot = dat
			src_transform = self.historical.transform_from_latlon( self.historical.ds.lat, lons )
			print( 'anomalies NOT rotated!' )
		else:
			dat, lons = utils.shiftgrid( 0., self.anomalies, self.anomalies.lon )
			self.anomalies_rot = dat
			src_transform = self.historical.transform_from_latlon( self.historical.ds.lat, lons )
			print( src_transform )
			print( 'anomalies rotated!' )

		# run and output
		rstlist = self.baseline.filelist * (self.anomalies_rot.shape[0] / 12)
		
		if isinstance( self.anomalies_rot, xr.Dataset ):
			self.anomalies_rot = self.anomalies_rot[ self.historical.variable ].data
		elif isinstance( self.anomalies_rot, xr.DataArray ):
			self.anomalies_rot = self.anomalies_rot.data
		else:
			self.anomalies_rot = self.anomalies_rot

		args = zip( self.anomalies_rot, rstlist, output_filenames )

		args = [{'anom':i, 'base':j, 'output_filename':k,\
				'downscaling_operation':self.downscaling_operation, \
				'post_downscale_function':self.post_downscale_function,\
				'mask':self.mask, 'mask_value':self.mask_value } for i,j,k in args ]

		# partial and wrapper
		f = partial( self.interp_ds, src_crs=self.src_crs, src_nodata=self.src_nodata, \
					dst_nodata=self.dst_nodata, src_transform=src_transform, resample_type=self.resample_type )

		run = partial( self._run_ds, f=f, operation_switch=operation_switch, anom=self.anom, mask_value=self.mask_value )

		# run it
		out = mp_map( run, args, nproc=self.ncpus )
		return output_dir
	def downscale_ar5_ts( self, *args, **kwargs ):
		from pathos.mp_map import mp_map

		# build output dirs

		# template setup

		# calc the anomalies
		anomalies = self._calc_anomalies()
		anomalies_pcll, lons_pcll = self.utils.shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds

		# mesh the lons and lats and unravel them to 1-D
		lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ]
		
		# convert into pandas.DataFrame and drop all the NaNs -- land-only dataset
		anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in anomalies_pcll ]
		xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data )

		# argument setup -- HARDWIRED
		# src_transform = affine.Affine( 0.5, 0.0, -180.0, 0.0, -0.5, 90.0 )
		# src_nodata = -9999.0
		# [!] THE ABOVE ARE INCORRECT FOR THE MODELED DATA


		# output_filenames setup
		dates = ds.time.to_pandas()
		years = dates.apply( lambda x: x.year ).tolist()
		months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ]
		month_year = [ (month, year) for year in years for month in months ]

		# read in the pre-processed 12-month climatology
		clim_list = sorted( glob.glob( os.path.join( self.clim_path, '*.tif' ) ) ) # this could catch you.
		clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, clim_list ) }
		# [!] THIS BELOW NEEDS RE-WORKING FOR THE AR5 DATA MODELED DATA
		output_filenames = [ os.path.join( downscaled_path, '_'.join([ variable, self.metric, cru_ts_version, 'downscaled', month, str(year) ])+'.tif' )
								for month, year in month_year ]

		# set downscaling_operation based on self.absolute boolean
		if self.absolute == True:
			downscaling_operation = 'add'
		elif self.absolute == False:
			downscaling_operation = 'mult'
		else:
			AttributeError( 'downscaling operation: self.absolute must be boolean' )

		args_list = [ { 'anom_df':anom_df, 
						'meshgrid_tuple':(xi, yi), 
						'template_raster_fn':template_raster_fn, 
						'lons_pcll':lons_pcll, 
						'src_transform':src_transform, 
						'src_crs':self.src_crs, \
						'src_nodata':src_nodata, 
						'output_filename':out_fn,
						'baseline_arr':clim_dict[ self._fn_month_grouper( out_fn ) ],
						'downscaling_operation':downscaling_operation, 
						'post_downscale_function':self.post_downscale_function,
						'write_anomalies':self.write_anomalies }
							for anom_df, out_fn in zip( anom_df_list, output_filenames ) ]

		# run anomalies interpolation and downscaling in a single go.
		# ( anom_df, meshgrid_tuple, template_raster_fn, lons_pcll, src_transform, src_crs, src_nodata, output_filename, write_anomalies ) 	
		out = mp_map( lambda args: self._interp_downscale_wrapper( args_dict=args ), args_list, nproc=self.ncores )
		return 'downscaling complete. files output at: %s' % base_path
Beispiel #20
0
	# files to folders:
	import os, itertools, glob, shutil
	from pathos.mp_map import mp_map
	from functools import partial

	models = ['MRI-CGCM3' , 'GFDL-CM3', 'CCSM4', 'IPSL-CM5A-LR']
	base_path = '/atlas_scratch/apbennett/Calibration/HighCalib/FMO_Calibrated/'
	scenarios = ['NoFMO','AltFMO','']
	for model in models :
		for scenario in scenarios :

			if scenario == '':
				maps_path = os.path.join(base_path, '_'.join([model,'rcp85']),'Maps')
				print maps_path

			else :
				maps_path = os.path.join(base_path, '_'.join([model,'rcp85', scenario]),'Maps')
				print maps_path



			variables = ['Age', 'Veg', 'FireScar', 'BasalArea', 'BurnSeverity'] # more can be added if they are needed. 
			out = [ i for i in itertools.product( [maps_path], variables ) ]

			# run
			for i in out:
				l = glob.glob( os.path.join( *i ) + '*.tif' )
				# run it in parallel:
				f = partial( move_files, output_path=maps_path )
				out_filenames = mp_map( f, sequence=l, nproc=32 )
Beispiel #21
0
	def downscale_cru_ts( self, *args, **kwargs ):
		'''
		run the CRU downscaling using the monthly climatology files given
		'''
		from pathos.mp_map import mp_map
		import glob, affine, rasterio

		nc_varname = self._get_varname_cru( )
		
		# handle cases where the desired varname != one parsed from file.
		# set it to self -- DangerTown™
		if self.variable == None:
			self.variable = nc_varname

		# build output dirs
		anomalies_path = os.path.join( self.base_path, self.variable, 'anom' )
		if not os.path.exists( anomalies_path ):
			os.makedirs( anomalies_path )

		downscaled_path = os.path.join( self.base_path, self.variable, 'downscaled' )
		if not os.path.exists( downscaled_path ):
			os.makedirs( downscaled_path )

		# template setup 
		template_raster = rasterio.open( self.template_raster_fn )
		template_meta = template_raster.meta
		template_meta.update( crs={'init':'epsg:3338'} )

		# make a mask with values of 0=nodata and 1=data
		template_raster_mask = template_raster.read_masks( 1 ) # mask of band 1 is all we need
		template_raster_mask[ template_raster_mask == 255 ] = 1

		anomalies = self._calc_anomalies( self.cru_ts, absolute=self.absolute )
		anomalies_pcll, lons_pcll = self.utils.shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds

		# mesh the lons and lats and unravel them to 1-D
		lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ]
		
		# convert into pandas.DataFrame and drop all the NaNs -- land-only dataset
		anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in anomalies_pcll ]
		xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data )

		# argument setup -- HARDWIRED
		src_transform = affine.Affine( 0.5, 0.0, -180.0, 0.0, -0.5, 90.0 )
		src_nodata = -9999.0
			
		# output_filenames setup
		dates = anomalies.time.to_pandas()
		years = np.unique( dates.apply( lambda x: x.year ) ).tolist()
		# years = np.unique( self._get_years_cru( self.cru_ts ) ) # CHANGED!
		cru_ts_version = self._get_version_cru( self.cru_ts ) # works if naming convention stays same
		months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ]
		month_year = [ (month, year) for year in years for month in months ]

		# read in the pre-processed 12-month climatology
		clim_list = sorted( glob.glob( os.path.join( self.clim_path, '*.tif' ) ) ) # this could catch you.
		clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, clim_list ) }
		output_filenames = [ os.path.join( downscaled_path, '_'.join([ self.variable, self.metric, cru_ts_version, 'downscaled', month, str(year) ])+'.tif' )
								for month, year in month_year ]

		# set downscaling_operation based on self.absolute boolean
		if self.absolute == True:
			downscaling_operation = 'add'
		elif self.absolute == False:
			downscaling_operation = 'mult'
		else:
			AttributeError( 'downscaling operation: self.absolute must be boolean' )

		args_list = [ { 'anom_df':anom_df,
						'meshgrid_tuple':(xi, yi), 
						'template_raster_fn':self.template_raster_fn,
						'lons_pcll':lons_pcll, 
						'src_transform':src_transform, 
						'src_crs':self.src_crs,
						'src_nodata':src_nodata, 
						'output_filename':out_fn,
						'baseline_arr':clim_dict[ self._fn_month_grouper( out_fn ) ],
						'downscaling_operation':downscaling_operation, 
						'post_downscale_function':self.post_downscale_function,
						'write_anomalies':self.write_anomalies }
							for anom_df, out_fn in zip( anom_df_list, output_filenames ) ]

		# run anomalies interpolation and downscaling in a single go.
		out = mp_map( lambda args: self._interp_downscale_wrapper( args_dict=args ), args_list, nproc=self.ncores )
		return 'downscaling complete. files output at: %s' % self.base_path
    except:
        pass
    return shutil.copy(fn, out_fn)


if __name__ == "__main__":
    import os, glob
    from pathos.mp_map import mp_map
    from functools import partial

    # base_dir = '/workspace/Shared/Tech_Projects/ESGF_Data_Access/project_data/tem_data_sep2016/raw/cmip5' # /output1/NASA-GISS/GISS-E2-R/historical/mon/atmos/Amon/r1i1p1/v20121015/hur
    base_dir = "/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/cmip5/prepped"
    output_dir = "/workspace/Shared/Tech_Projects/ESGF_Data_Access/project_data/tem_data_sep2016/cmip5/prepped"

    filelist = []
    for root, subs, files in os.walk(base_dir):
        if len(files) > 0:
            filelist = filelist + [
                os.path.join(root, fn) for fn in files if fn.endswith(".nc") and "tas_" or "pr_" in fn
            ]
    f = partial(copy_fn, output_dir=output_dir)
    done = mp_map(f, filelist, nproc=32)

# REACCESS WITH SYNDA ALL THE FILES WE NEED, THIS WAY WE CAN AUTOPURGE THE OLD VERSIONS WE HAVE HERE.
# project=CMIP5
# model=MRI-CGCM3 GISS-E2-R GFDL-CM3 IPSL-CM5A-LR CCSM4
# experiment=rcp26
# ensemble=r1i1p1
# variable[atmos][mon]=clt
# timeslice=1800-2150
if __name__ == '__main__':
	import os, glob, subprocess, itertools
	from pathos.mp_map import mp_map

	output_path = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data/downscaled'
	ncpus = 32
	project = 'cmip5'
	variables = [ 'tas', 'pr' ]
	models = [ 'IPSL-CM5A-LR', 'MRI-CGCM3', 'GISS-E2-R', 'GFDL-CM3', 'NCAR-CCSM4', '5ModelAvg' ]
	scenarios = [ 'historical', 'rcp26', 'rcp45', 'rcp60', 'rcp85' ]
	
	commands = []
	for variable, model, scenario in itertools.product( variables, models, scenarios ):
		if scenario == 'historical':
			base_path = '/Data/Base_Data/Climate/AK_CAN_2km/historical/AR5_CMIP5_models'
		else:
			base_path = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models'
		
		# print( '{} {} {}'.format( variable, model, scenario ) )
		base = os.path.join( base_path, scenario, model, variable )
		out = os.path.join( output_path, model, scenario )

		if not os.path.exists( out ):
			os.makedirs( out )

		# symlink them to their new directory structure
		commands = commands + [ ' '.join([ 'cp', '-rs', base, out ]) ]
		# _ = subprocess.call([ 'cp', '-rs', base, out ])
	
	final = mp_map( run, commands, nproc=ncpus )
	def downscale_ar5_ts( self, *args, **kwargs ):
		#  * * * * * * * * * *
		# template setup
		from pathos.mp_map import mp_map
		import glob, affine, rasterio

		nc_varname = self._get_varname_ar5()
		# handle cases where the desired varname != one parsed from file.
		if self.variable == None:
			variable = nc_varname
		else:
			variable = self.variable
		
		print variable

		# build output dirs
		anomalies_path = os.path.join( base_path, variable, 'anom' )
		if not os.path.exists( anomalies_path ):
			os.makedirs( anomalies_path )

		downscaled_path = os.path.join( base_path, variable, 'downscaled' )
		if not os.path.exists( downscaled_path ):
			os.makedirs( downscaled_path )

		#  * * * * * * * * * *

		# calc the anomalies
		anomalies = self._calc_anomalies()
		anomalies_pcll, lons_pcll = self.utils.shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds

		# mesh the lons and lats and unravel them to 1-D
		lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ]
		
		# convert into pandas.DataFrame and drop all the NaNs -- land-only dataset
		anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in anomalies_pcll ]
		xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data )

		# some metadata
		src_transform = self._calc_ar5_affine()
		# argument setup -- HARDWIRED
		src_nodata = None # DangerTown
		# src_crs = {'init':'epsg:4326'} # DangerTown

		# output_filenames setup
		dates = anomalies.time.to_pandas()
		years = np.unique( dates.apply( lambda x: x.year ) ).tolist()
		months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ]
		month_year = [ (month, year) for year in years for month in months ]

		# read in the pre-processed 12-month climatology
		clim_list = sorted( glob.glob( os.path.join( self.clim_path, '*.tif' ) ) ) # this could catch you.
		clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, clim_list ) }
		
		# [!] THIS BELOW NEEDS RE-WORKING FOR THE AR5 DATA MODELED DATA 
		output_filenames = [ os.path.join( downscaled_path, '_'.join([ variable, self.metric, 'ar5', 'downscaled', month, str(year) ])+'.tif' )
								for month, year in month_year ]

		# set downscaling_operation based on self.absolute boolean
		if self.absolute == True:
			downscaling_operation = 'add'
		elif self.absolute == False:
			downscaling_operation = 'mult'
		else:
			AttributeError( 'downscaling operation: self.absolute must be boolean' )

		args_list = [ { 'anom_df':anom_df, 
						'meshgrid_tuple':(xi, yi), 
						'template_raster_fn':template_raster_fn, 
						'lons_pcll':lons_pcll, 
						'src_transform':src_transform, 
						'src_crs':self.src_crs,
						'src_nodata':src_nodata,
						'output_filename':out_fn,
						'baseline_arr':clim_dict[ self._fn_month_grouper( out_fn ) ],
						'downscaling_operation':downscaling_operation, 
						'post_downscale_function':self.post_downscale_function,
						'write_anomalies':self.write_anomalies }
							for anom_df, out_fn in zip( anom_df_list, output_filenames ) ]

		# run anomalies interpolation and downscaling in a single go.
		# ( anom_df, meshgrid_tuple, template_raster_fn, lons_pcll, src_transform, src_crs, src_nodata, output_filename, write_anomalies ) 	
		out = mp_map( lambda args: self._interp_downscale_wrapper( args_dict=args ), args_list, nproc=self.ncores )
		return 'downscaling complete. files output at: %s' % base_path
		for m in models:
			out = {}
			for v in variables:
				# new delta version
				path = os.path.join( base_dir,'downscaled_minmax', m, scenario, v )
				files = glob.glob( os.path.join( path, '*.tif' ) )
				files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )

				# make a mask
				rst = rasterio.open( files[0] )
				# mask_arr = np.empty_like( rst.read(1) )
				shapes = ((geom,value) for geom, value in zip(shp.geometry, [0]))
				burned = features.rasterize(shapes=shapes, out_shape=rst.shape, fill=1, transform=rst.affine )

				f = partial( masked_mean, mask=burned, bounds=None )
				out[ v ] = mp_map( f, files, nproc=4 )
				
				# standard delta version
				path = os.path.join( base_dir, 'downscaled', m, scenario, v )
				files = glob.glob( os.path.join( path, '*.tif' ) )
				files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )

				# make a mask
				rst = rasterio.open( files[0] )
				# mask_arr = np.empty_like( rst.read(1) )
				shapes = ((geom,value) for geom, value in zip(shp.geometry, [0]))
				burned = features.rasterize(shapes=shapes, out_shape=rst.shape, fill=1, transform=rst.affine )

				f = partial( masked_mean, mask=burned, bounds=None )
				out[ v+'_old' ] = mp_map( f, files, nproc=4 )
	
	# unpack
	variable = args.variable
	base_dir = args.base_dir

	# # # # #TESTING
	# base_dir = '/workspace/Shared/Tech_Projects/EPSCoR_Southcentral/project_data'
	# variable = 'tas'
	# # # # # # # # 

	# some setup args
	base_dir = os.path.join( base_dir, 'downscaled' )
	variables = [ variable ] # ['pr','tas','tasmax', 'tasmin' ]
	scenarios = [ 'historical', 'rcp26', 'rcp45', 'rcp60', 'rcp85' ]
	models = [ 'IPSL-CM5A-LR', 'MRI-CGCM3', 'GISS-E2-R', 'GFDL-CM3', 'NCAR-CCSM4' ]

	for variable, scenario in itertools.product( variables, scenarios ):
		if scenario == 'historical':
			begin = 1900
			end = 2005
		else:
			begin = 2006
			end = 2100

		# list the files we want
		input_files = [ list_files( os.path.join( base_dir, model, scenario, variable ), begin, end ) for model in models ]
		grouped = zip( *input_files )

		# run it in parallel
		output_filenames = mp_map( generate, grouped, nproc=32 )
	def _main( x, *args, **kwargs ):
		'''
		run the CRU downscaling using the monthly climatology files given
		'''
		from pathos.mp_map import mp_map
		import glob, affine

		nc_varname = get_varname_cru( nc_fn )
		# handle cases where the desired varname is not the same as the one parsed from file.
		if variable == None:
			variable = nc_varname
		else:
			variable = nc_varname
		
		# build output dirs
		anomalies_path = os.path.join( base_path, variable, 'anom' )
		if not os.path.exists( anomalies_path ):
			os.makedirs( anomalies_path )

		downscaled_path = os.path.join( base_path, variable, 'downscaled' )
		if not os.path.exists( downscaled_path ):
			os.makedirs( downscaled_path )

		# template setup 
		template_raster = rasterio.open( template_raster_fn )
		template_meta = template_raster.meta
		template_meta.update( crs={'init':'epsg:3338'} )

		# make a mask with values of 0=nodata and 1=data
		template_raster_mask = template_raster.read_masks( 1 ) # mask of band 1 is all we need
		template_raster_mask[ template_raster_mask == 255 ] = 1

		anomalies = calc_anomalies( nc_fn, nc_varname, climatology_begin, climatology_end, absolute ) # the absolute calculation needs some thought
		anomalies_pcll, lons_pcll = shiftgrid( 0., anomalies, anomalies.lon.data ) # grabs lons from the xray ds

		# mesh the lons and lats and unravel them to 1-D
		lo, la = [ i.ravel() for i in np.meshgrid( lons_pcll, anomalies.lat ) ]
		
		# convert into pandas.DataFrame and drop all the NaNs -- land-only dataset
		anom_df_list = [ pd.DataFrame({ 'anom':i.ravel(), 'lat':la, 'lon':lo }).dropna( axis=0, how='any' ) for i in dat_pcll ]
		xi, yi = np.meshgrid( lons_pcll, anomalies.lat.data )

		# argumet setup -- HARDWIRED
		src_transform = affine.Affine( 0.5, 0.0, -180.0, 0.0, -0.5, 90.0 )
		src_crs = {'init':'epsg:4326'}
		src_nodata = -9999.0
			
		# output_filenames setup
		years = get_years_cru( nc_fn )
		cru_ts_version = get_version_cru( nc_fn ) # works if naming convention stays same
		months = [ i if len(i)==2 else '0'+i for i in np.arange( 1, 12+1, 1 ).astype( str ).tolist() ]
		month_year = [ (month, year) for year in years for month in months ]

		output_filenames = [ os.path.join( anomalies_path, '_'.join([ variable, metric, 'cru_ts'+str(cru_ts_version), 'anom', month, year ])+'.tif' )
								for month, year in month_year ]

		# make a list of args to pass to the interpolation function
		args_list = [ {'anomalies':anom_df, 'meshgrid_tuple':(xi, yi), 'lons_pcll':lons_pcll, \
					'template_raster_fn':template_raster_fn, 'src_transform':src_transform, \
					'src_crs':src_crs, 'src_nodata':src_nodata, 'output_filename':fn } \
						for anom_df, fn in zip( anom_df_list, output_filenames ) ]
		
		anomalies = mp_map( lambda x: interpolate_anomalies( *x ), args_list, nproc=ncores )

		# read in the pre-processed 12-month climatology
		l = sorted( glob.glob( os.path.join( cl20_path, '*.tif' ) ) ) # this could catch you.
		clim_dict = { month:rasterio.open( fn ).read( 1 ) for month, fn in zip( months, l ) }

		# group the data by months
		out = pd.Series( out )
		out_months = out.apply( fn_month_grouper )
		months_grouped = out.groupby( out_months )

		# unpack groups for parallelization
		mg = [(i,j) for i,j in months_grouped ]
		# make an args tuple to pass to the function
		args_list = [ ( i[1], clim_dict[i[0]], downscaled_path, absolute ) for i in mg ]

		# downscale / write to disk
		out = mp_map( lambda args: downscale_cru_historical( *args ), args_list, nproc=ncores )
		return 'downscaling complete. files output at: %s' % base_path
	model = '5ModelAvg'
	scenario = 'rcp45'
	begin = 2010
	end = 2015

	variables = ['tasmax', 'tas', 'tasmin']
	out = {}
	for v in variables:
		path = os.path.join( base_dir,'EPSCOR_SC_DELIVERY_AUG2016','downscaled', model, scenario, v )
		# for testing with new downscaler
		if v == 'tas':
			path = os.path.join( base_dir,'downscaled_tas_pr_epscor_sc', model, scenario, v )
		
		files = glob.glob( os.path.join( path, '*.tif' ) )
		files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )
		out[ v ] = mp_map( masked_mean, files, nproc=4 )

	plot_df = pd.DataFrame( out )
	plot_df.index = pd.date_range( start=str(begin), end=str(end+1), freq='M' )
	plot_df = plot_df[['tasmax', 'tas', 'tasmin']] # get em in the order for plotting

	# now plot the dataframe
	if begin == end:
		title = 'EPSCoR SC AOI Temp Metrics {} {} {}'.format( model, scenario, begin )
	else:
		title = 'EPSCoR SC AOI Temp Metrics {} {} {} - {}'.format( model, scenario, begin, end )

	figsize = (13,9)
	colors = ['red', 'black', 'blue' ]

	ax = plot_df.plot( kind='line', title=title, figsize=figsize, color=colors )
			# 		begin = 1950
			# 		end = 1965
			# 	else:
			# 		old_dir = '/Data/Base_Data/Climate/AK_CAN_2km/projected/AR5_CMIP5_models'
			# 		begin = 2060
			# 		end = 2070

			figsize = (16,9)
			out_raw = {}
			out_anom = {}
			for v in variables:
				# raw
				path = os.path.join( base_dir, m, scenario, v )
				files = glob.glob( os.path.join( path, '*.tif' ) )
				files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )
				out_raw[ v ] = mp_map( masked_mean, files, nproc=4 )
				# anom
				path = os.path.join( base_dir, m, scenario, v, 'anom' )
				files = glob.glob( os.path.join( path, '*.tif' ) )
				files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-2 ), elem_month=-3, elem_year=-2 )
				out_anom[ v ] = mp_map( masked_mean, files, nproc=4 )


				# if v == 'tas' or v == 'pr':
				# 	if m == 'CRU_TS323':
				# 		path = os.path.join( old_dir, v )
				# 	else:	
				#		path = os.path.join( old_dir, scenario, m, v )
				# 	files = glob.glob( os.path.join( path, '*.tif' ) )
				# 	files = sort_files( only_years( files, begin=begin, end=end, split_on='_', elem_year=-1 ) )
				# 	out[ v+'_old' ] = mp_map( masked_mean, files, nproc=4 )
Beispiel #30
0
	def interp_na( self ):
		'''
		np.float32
		method = [str] one of 'cubic', 'near', 'linear'

		return a list of dicts to pass to the xyz_to_grid in parallel
		'''
		from copy import copy
		import pandas as pd
		import numpy as np
		from pathos.mp_map import mp_map

		# remove the darn scientific notation
		np.set_printoptions( suppress=True )
		output_dtype = np.float32
		
		# if 0-360 leave it alone
		if ( self.ds.lon > 200.0 ).any() == True:
			dat, lons = self.ds[ self.variable ].data, self.ds.lon
			self._lonpc = lons
		else:
			# greenwich-centered rotate to 0-360 for interpolation across pacific
			dat, lons = self.rotate( self.ds[ self.variable ].values, self.ds.lon, to_pacific=True )
			self._rotated = True # update the rotated attribute
			self._lonpc = lons

		# mesh the lons and lats and unravel them to 1-D
		xi, yi = np.meshgrid( self._lonpc, self.ds.lat.data )
		lo, la = [ i.ravel() for i in (xi,yi) ]

		# setup args for multiprocessing
		df_list = [ pd.DataFrame({ 'x':lo, 'y':la, 'z':d.ravel() }).dropna( axis=0, how='any' ) for d in dat ]

		args = [ {'x':np.array(df['x']), 'y':np.array(df['y']), 'z':np.array(df['z']), \
				'grid':(xi,yi), 'method':self.method, 'output_dtype':output_dtype } for df in df_list ]
		
		# # # # USE MLAB's griddata which we _can_ parallelize
		def wrap( d ):
			''' simple wrapper around utils.xyz_to_grid for mp_map'''
			x = np.array( d['x'] )
			y = np.array( d['y'] )
			z = np.array( d['z'] )
			xi, yi = d['grid']
			return utils.xyz_to_grid( x, y, z, (xi,yi), interp='linear' )
		# # # # 

		try:
			print( 'processing interpolation to convex hull in parallel using {} cpus.'.format( self.ncpus ) )
			dat_list = mp_map( wrap, args, nproc=self.ncpus )
			dat_list = [ i.data for i in dat_list ] # drop the output mask
			dat = np.array( dat_list )
		except:
			print( 'processing cru re-gridding in serial due to multiprocessing issues...' )
			dat = np.array([ wrap( **i ) for i in args ])

		lons = self._lonpc
		if self._rotated == True: # rotate it back
			dat, lons = self.rotate( dat, lons, to_pacific=False )
				
		# place back into a new xarray.Dataset object for further processing
		self.ds = self.ds.update( { self.variable:( ['time','lat','lon'], dat ) } )
		print( 'ds interpolated updated into self.ds' )
		return 1
	# path_list = [ os.path.join( input_path, model, variable, 'downscaled', experiment, '*.tif' ) for model in models for variable in variables for experiment in experiments ]

	# for path in path_list:
	# 	print( 'running: %s ' % path )
	# 	# path='/Data/malindgren/cru_november_final/IEM/ar5/MRI-CGCM3/cld/downscaled/*rcp26*.tif'
	# 	cld = pd.Series( glob.glob( path ) )
	# 	print( 'file count: %d' % len( cld ) )

	# 	output_filenames = cld.apply( lambda x: x.replace('cld', 'rsds').replace( '_pct_', '_MJ-m2-d1_' ) ).tolist()
	# 	month_grouper = cld.apply( lambda x: os.path.basename( x ).split( '.' )[0].split( '_' )[-2] )
	# 	args_list = [ ( cld, girr[ month ], out ) for cld, out, month in zip( cld.tolist(), output_filenames, month_grouper ) ]

	# 	# run it in parallel
	# 	out = mp_map( lambda x: generate_nirr( *x ), args_list, nproc=ncores )


	# # # CRU TS 3.23 Historical DATA VERSION
	# list the cloud files for a series
	path = '/workspace/Shared/Tech_Projects/ALFRESCO_Inputs/project_data/TEM_Data/cru_october_final/IEM/cru_ts31/cld/downscaled/*.tif'
	print( 'running: %s ' % path )
	cld = pd.Series( glob.glob( path ) )
	print( 'file count: %d' % len( cld ) )

	output_filenames = cld.apply( lambda x: x.replace('cld', 'rsds').replace( '_pct_', '_MJ-m2-d1_' ) ).tolist()
	month_grouper = cld.apply( lambda x: os.path.basename( x ).split( '.' )[0].split( '_' )[-2] )
	args_list = [ ( cld, girr[ month ], out ) for cld, out, month in zip( cld.tolist(), output_filenames, month_grouper ) ]

	# run it in parallel
	out = mp_map( lambda x: generate_nirr( *x ), args_list, nproc=ncores )