def gen_config(config, task=0, runid=0, file="", list=[], reaction="sidis"): args = Namespace(config=config, task=task, runid=runid, file=file, list=list, reaction=reaction) conf = load_config(args.config) conf["args"] = args return conf
def setup(self): self.config = tools.load_config() self.output_folder = self.config[ 'data_folder'] + 'historic_observations/' begin_date = tools.string_to_date( str(self.config['historic_years_begin']) + '0101', h=False) end_date = tools.string_to_date( str(self.config['historic_years_end']) + '1231', h=False) prism = prism_tools.prism_ftp_info() date_range_daily = pd.date_range(begin_date, end_date, freq='1D').to_pydatetime() # All dates for PRISM should be available, but check just to make sure date_range_daily = [ d for d in date_range_daily if prism.date_available(d) ] self.job_list = [] for i, d in enumerate(date_range_daily): url = prism.get_download_url(d) day_status = prism.get_date_status(d) self.job_list.append({ 'date': d, 'download_url': url, 'day_status': day_status, 'varname': 'tmean' }) self.total_jobs = len(self.job_list) prism.close()
def setup(self): self.config = tools.load_config() # Collect information on available CFS forecasts # TODO: extend this for the full years begin_date = tools.string_to_date(str(1995) + '0101', h=False) end_date = tools.string_to_date(str(2015) + '1201', h=False) # CFS reanalysis are monthly date_range_monthly = pd.date_range(begin_date, end_date, freq='MS').to_pydatetime() cfs = cfs_tools.cfs_ftp_info() self.job_list = [] for d in date_range_monthly: download_url = cfs.reanalysis_url_from_timestamp(reanalysis_time=d, protocal='http') self.job_list.append({'download_url': download_url, 'date': d}) self.total_jobs = len(self.job_list) cfs.close()
def setup(self): self.config = tools.load_config() self.output_folder = self.config['historic_observations_folder']
def get_forecasts_from_date(forecast_date, destination_folder, lead_time=36, forecast_ensemble_size=5, current_season_observed=None): """Download and process forecasts from a specific date In the daily forecasting the date will be "today", but in hindcasting will be dates in the past. In which case it will obtain n forecasts starting on the 18 hour of the forecast date and working backword in time. n being equal to forecast_ensemble_size. ie. forecast_date = '20180215' will get forecasts at ['2018021518','2018021512','2018021506','2018021500','2018021418'] or more prior ones if <5 are available. current_season_observed xarray object of one produced by download_latest_observations """ config = tools.load_config() if not current_season_observed: current_season_observed = xr.open_dataset( config['current_season_observations_file']) current_season_observed = current_season_observed.drop('status') land_mask = xr.open_dataset(config['mask_file']) tmean_names = config['variables_to_use']['tmean'] most_recent_observed_day = pd.Timestamp( current_season_observed.time.values[-1]).to_pydatetime() first_forecast_day = most_recent_observed_day + datetime.timedelta(days=1) #today = pd.Timestamp.today().date() last_forecast_day = forecast_date + pd.offsets.Week(lead_time) # Get info for more forecasts than needed in case some fail # during processing. 4 forecasts are issued every day, so 10 # extra is about 2 days worth. cfs = cfs_tools.cfs_ftp_info() most_recent_forecasts = cfs.last_n_forecasts(n=forecast_ensemble_size + 20, from_date=forecast_date) cfs.close() # Arrange the downscale model to easily do array math with the # forecast arrays. And chunk it so it doesn't consume a large # memory footprint (but takes a few minutes longer) downscale_model = xr.open_dataset( config['downscaling_model_coefficients_file']) downscale_model.load() downscale_model = broadcast_downscale_model(downscale_model, start_date=first_forecast_day, end_date=last_forecast_day) downscale_model = downscale_model.chunk({'lat': 200, 'lon': 200}) num_forecasts_added = 0 print(len(most_recent_forecasts)) for forecast_info in most_recent_forecasts: if num_forecasts_added == forecast_ensemble_size: break local_filename = config['tmp_folder'] + os.path.basename( forecast_info['download_url']) initial_time = tools.string_to_date(forecast_info['initial_time'], h=True) print('\n\n\n') print( 'Attempting to process climate forecast {i} of {n} with initial time {t}' .format(i=num_forecasts_added, n=forecast_ensemble_size, t=initial_time)) print('download URL: ' + str(forecast_info['download_url'])) # If the observed data is late in updating and the forecast # is very recent there will be gaps. if initial_time.date() > first_forecast_day.date(): print('''Forecast skipped Gap between forecast and observed dates forecast initial time: {f_time} latest observed time: {o_time} '''.format(f_time=initial_time, o_time=first_forecast_day)) continue try: tools.download_file(forecast_info['download_url'], local_filename) forecast_obj = cfs_tools.convert_cfs_grib_forecast( local_filename, add_initial_time_dim=False, date=initial_time) except: print('processing error in download/converting') continue # If the last day of the CFS forecast is off a lot a bunch from the # last day we're shooting for skip it. This happens occasionaly cause of # I'm assuming, errors on NOAA's end if forecast_obj.forecast_time[-1].values < np.datetime64( last_forecast_day): print('Skipping due to bad forecast end date. ends on {d}'.format( d=forecast_obj.forecast_time[-1].values)) continue # ~1.0 deg cfs grid to 4km prism grid. #TODO: use distance_weighted method with k:2 try: forecast_obj = cfs_tools.spatial_downscale( ds=forecast_obj, method='distance_weighted', downscale_args={'k': 2}, data_var='tmean', target_array=land_mask.to_array()[0]) except: print('processing error in spatial downscale') continue # Limit to the lead time. try: dates_GE_first_day = forecast_obj.forecast_time.values >= np.datetime64( first_forecast_day) dates_LE_last_day = forecast_obj.forecast_time.values <= np.datetime64( last_forecast_day) times_to_keep = np.logical_and(dates_GE_first_day, dates_LE_last_day) forecast_obj = forecast_obj.isel(forecast_time=times_to_keep) # Apply downscaling model forecast_obj = forecast_obj.rename({'forecast_time': 'time'}) forecast_obj = forecast_obj.chunk({'lat': 200, 'lon': 200}) forecast_obj = forecast_obj[ 'tmean'] * downscale_model.slope + downscale_model.intercept forecast_obj = forecast_obj.to_dataset(name='tmean') except: print('processing error in downscaling') continue # Add in observed observations # rounding errors can make it so lat/lon don't line up exactly # copying lat and lon fixes this. try: forecast_obj['lat'] = current_season_observed['lat'] forecast_obj['lon'] = current_season_observed['lon'] forecast_obj = xr.merge([forecast_obj, current_season_observed]) except: print('processing error in merging with observed data') continue # TODO: add provenance metadata try: processed_filename = destination_folder + 'cfsv2_' + forecast_info[ 'initial_time'] + '.nc' forecast_obj.to_netcdf(processed_filename) except: print('processing error in saving file') continue print('Successfuly proccessed forecast from initial time: ' + str(initial_time)) num_forecasts_added += 1 assert num_forecasts_added == forecast_ensemble_size, 'not enough forecasts added. {added} of {needed}'.format( added=num_forecasts_added, needed=forecast_ensemble_size)
def run(climate_forecast_folder=None, phenology_forecast_folder=None, species_list=None): """Build phenology models """ divider = '#' * 90 config = tools.load_config() current_season = tools.current_growing_season(config) current_season_doy_0 = str(int(current_season)) + '0101' current_season_doy_0 = tools.string_to_date(current_season_doy_0, h=False).date() today = datetime.datetime.today().date() current_doy = today.timetuple().tm_yday season_first_date = str( int(current_season) - 1) + config['season_month_begin'] + config['season_day_begin'] season_first_date = tools.string_to_date(season_first_date, h=False).date() # if the season for spring forecasts has started. Nov 1 if today >= season_first_date: # adjust the current doy to potentially be negative to reflect the doy # for the following calendar year. if today < current_season_doy_0: current_doy -= 365 print(divider) print('Applying phenology models - ' + str(today)) range_masks = xr.open_dataset(config['species_range_file']) doy_0 = np.datetime64(current_season_doy_0) # Default location of climate forecasts if not climate_forecast_folder: climate_forecast_folder = config['current_forecast_folder'] current_climate_forecast_files = glob.glob(climate_forecast_folder + '*.nc') print( str(len(current_climate_forecast_files)) + ' current climate forecast files: \n' + str(current_climate_forecast_files)) # Load default species list if no special one was passed if not species_list: species_list = pd.read_csv(config['species_list_file']) species_list = species_list[[ 'species', 'Phenophase_ID', 'current_forecast_version', 'season_start_doy', 'season_end_doy' ]] # Only forecast species and phenophases in the current season species_list = species_list[(current_doy >= species_list.season_start_doy) & (current_doy <= species_list.season_end_doy)] if len(species_list) == 0: raise RuntimeError( 'No species currenly in season, which is roughly Dec. 1 - Nov. 1') phenology_model_metadata = pd.read_csv( config['phenology_model_metadata_file']) forecast_metadata = species_list.merge( phenology_model_metadata, left_on=['species', 'Phenophase_ID', 'current_forecast_version'], right_on=['species', 'Phenophase_ID', 'forecast_version'], how='left') # Default location to write phenology forecasts if not phenology_forecast_folder: phenology_forecast_folder = config['phenology_forecast_folder'] print(divider) # Load the climate forecasts #current_climate_forecasts = [xr.open_dataset(f) for f in current_climate_forecast_files] num_species_processed = 0 for i, forecast_info in enumerate(forecast_metadata.to_dict('records')): species = forecast_info['species'] phenophase = forecast_info['Phenophase_ID'] model_file = config['phenology_model_folder'] + forecast_info[ 'model_file'] model = utils.load_saved_model(model_file) print(divider) if species not in range_masks.species.values: print('Skipping {s} {p}, no range mask'.format(s=species, p=phenophase)) continue else: print('Apply model for {s} {p}'.format(s=species, p=phenophase)) print( 'forecast attempt {i} of {n} potential species. {n2} processed succesfully so far.' .format(i=i, n=len(forecast_metadata), n2=num_species_processed)) species_range = range_masks.sel(species=species) prediction, prediction_sd = predict_phenology_from_climate( model, current_climate_forecast_files, post_process='automated', doy_0=doy_0, species_range=species_range, n_jobs=config['n_jobs']) species_forecast = xr.Dataset(data_vars={ 'doy_prediction': (('species', 'phenophase', 'lat', 'lon'), prediction), 'doy_sd': (('species', 'phenophase', 'lat', 'lon'), prediction_sd) }, coords={ 'species': [species], 'phenophase': [phenophase], 'lat': species_range.lat, 'lon': species_range.lon }) if i == 0: all_species_forecasts = species_forecast num_species_processed += 1 else: merge_start_time = time.time() all_species_forecasts = xr.merge( [all_species_forecasts, species_forecast]) print('merge time {s} sec'.format( s=round(time.time() - merge_start_time, 0))) num_species_processed += 1 # Merging this files over and over slows things down more and more # Saving it every few iterations seems to speed things up. if num_species_processed % 5 == 0: all_species_forecasts.to_netcdf(config['tmp_folder'] + 'forecast_tmp.nc') all_species_forecasts = xr.open_dataset(config['tmp_folder'] + 'forecast_tmp.nc') all_species_forecasts.load() all_species_forecasts.close() print(divider) print('phenology forecast final processing') #all_species_forecasts = xr.merge(all_species_forecasts) current_season = tools.current_growing_season(config) provenance_note = \ """Forecasts for plant phenology of select species flowering and/or leaf out times for the {s} season. Made on {t} from NOAA CFSv2 forecasts downscaled using PRISM climate data. Plant phenology models made using National Phenology Network data. """.format(s=current_season, t=today) all_species_forecasts.attrs['note'] = provenance_note all_species_forecasts.attrs['issue_date'] = str(today) all_species_forecasts.attrs['crs'] = '+init=epsg:4269' # TODO: add some more metadata # common names? #all_species_forecasts['forecast_date']=str(today) #all_species_forecasts['forecast_date']=str(today) forecast_filename = config[ 'phenology_forecast_folder'] + 'phenology_forecast_' + str( today) + '.nc' all_species_forecasts = all_species_forecasts.chunk({'lat': 50, 'lon': 50}) all_species_forecasts.to_netcdf(forecast_filename, encoding={ 'doy_prediction': { 'zlib': True, 'complevel': 4, 'dtype': 'int32', 'scale_factor': 0.001, '_FillValue': -9999 }, 'doy_sd': { 'zlib': True, 'complevel': 4, 'dtype': 'int32', 'scale_factor': 0.001, '_FillValue': -9999 } }) # Return filename of final forecast file for use by primary script return forecast_filename
def setup(self): self.config = tools.load_config() self.land_mask = xr.open_dataset(self.config['mask_file'])
ax.hist(data[k], label=k) ax.legend() py.tight_layout() py.savefig('plot.pdf') def test2(self): for i in range(100): try: R = np.random.rand(7) x, y, z, Q2, phi_h, phi_S, pT, Sperp, Spar, lini, lfin, P, ph = self._gen_event( R) except: continue le = self.conf['le'] target = self.conf['target'] hadron = self.conf['hadron'] xsec = self.sidis.get_xsec(x, z, y, Q2, pT, phi_h, phi_S, Sperp, Spar, le, target, hadron) print xsec if __name__ == '__main__': conf = load_config('input.py') conf['aux'] = AUX() mceg = MCEG(conf) mceg.test2()
def run(): config = tools.load_config() current_season = tools.current_growing_season(config) image_metadata = pd.read_csv( config['phenology_forecast_figure_metadata_file']) # Sort by issue date and then species so they are displayed correctly in the dropdown image_metadata['issue_date_object'] = pd.DatetimeIndex( image_metadata.forecast_issue_date) image_metadata.sort_values(['issue_date_object', 'species'], inplace=True) ############################### # Keep only images from the current growing season image_metadata = image_metadata[image_metadata.forecast_season == int( current_season)] ############################### # Create menu items with a pretty display name and an internal code #################### most_recent_date = datetime.datetime.strptime('2000-01-01', '%Y-%m-%d') available_issue_dates = [] for d in image_metadata.forecast_issue_date.unique().tolist(): d_object = datetime.datetime.strptime(d, '%Y-%m-%d') available_issue_dates.append({ 'display_text': d_object.strftime('%b %d, %Y'), 'value': d }) if d_object > most_recent_date: most_recent_date = d_object # Default menu item is the most recent issue date for date_metadata in available_issue_dates: if date_metadata['value'] == most_recent_date.strftime('%Y-%m-%d'): date_metadata['default'] = 1 else: date_metadata['default'] = 0 #################### available_species = [] # Create display as: 'red maple (Acer rubrum)' image_metadata['display_text'] = image_metadata[[ 'species', 'common_name' ]].apply(lambda x: '{c} ({s})'.format(c=x[1], s=x[0].capitalize()), axis=1) # change the space in the species to an underscore. image_metadata['species'] = image_metadata['species'].apply( lambda x: x.replace(' ', '_')) image_metadata['default'] = 0 # Get info needed for javascript menu. The value to pass around (species w/ and underscore), # the common/scientific name display text, and whether its the default one to show available_species = image_metadata[[ 'species', 'display_text', 'default' ]].drop_duplicates().rename(columns={ 'species': 'value' }).to_dict('records') # Set the default species to something random. def image_available(species, phenophase=498, most_recent_date=most_recent_date): matching_images = image_metadata.query('species == @species & \ phenophase == @phenophase & \ issue_date_object == @most_recent_date' ) return matching_images.shape[0] > 0 shuffle(available_species) for species_metadata in available_species: if image_available(species_metadata['value']): species_metadata['default'] = 1 break # Sort again to it's alphabetical by species in the website menu available_species = sorted(available_species, key=lambda s: s['value']) ##################### available_phenophase = [{ 'value': '371', 'display_text': 'Leaves', 'default': 1 }, { 'value': '501', 'display_text': 'Flowers', 'default': 0 }, { 'value': '498', 'display_text': 'Fall colors', 'default': 0 }] ############ available_images = image_metadata.img_filename.tolist() ############################## ############################## json_metadata = { 'available_issue_dates': available_issue_dates, 'available_species': available_species, 'available_phenophase': available_phenophase, 'available_images': available_images } with open( config['phenology_forecast_figure_folder'] + 'image_metadata.json', 'w') as f: json.dump(json_metadata, f, indent=4)
from __future__ import division, print_function import numpy as np import pandas as pd from matplotlib import rc # import matplotlib.patches as mpatches import matplotlib.pyplot as plt from fitlab.resman import RESMAN from tools.tools import load_config # %matplotlib inline plt.ion() rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"]}) conf = load_config("../fitlab/inputs/upol_compass.py") conf["resman"] = RESMAN(conf) conf["resman"].get_residuals(conf["parman"].par) # Bins x_bins = [0.003, 0.008, 0.013, 0.02, 0.032, 0.055, 0.1, 0.21, 0.4] q2_bins = [1.0, 1.7, 3.0, 7.0, 16.0, 81.0] raw = pd.read_excel('../database/sidis/expdata/5001.xlsx') data = pd.concat( pd.DataFrame(d) for d in conf["resman"].sidisres.tabs.values()) data = data[data["hadron"] == "pi+"] z_func = lambda z: [0.2, 0.3, 0.4, 0.6].index(z)
def run(): divider='#'*90 config = tools.load_config() today = datetime.datetime.today().date() land_mask = xr.open_dataset(config['mask_file']) print(divider) print('Applying phenocam phenology models - ' + str(today)) doy_0 = np.datetime64('2018-01-01') current_climate_forecast_files = glob.glob(config['current_forecast_folder']+'*.nc') print(str(len(current_climate_forecast_files)) + ' current climate forecast files: \n' + str(current_climate_forecast_files)) print(divider) # Load the climate forecasts current_climate_forecasts = [xr.open_dataset(f) for f in current_climate_forecast_files] for i, forecast_info in enumerate(phenocam_models): model_nickname = forecast_info['nickname'] model_parameters = forecast_info['parameters'] base_model_name = forecast_info['base_model'] Model = utils.load_model(base_model_name) model = Model(parameters=model_parameters) print('attempting phenocam model ' + model_nickname) #TODO: use tools.phenology_tools stuff here ensemble = [] for climate in current_climate_forecasts: doy_series = pd.TimedeltaIndex(climate.time.values - doy_0, freq='D').days.values ensemble.append(model.predict(to_predict = climate.tmean.values, doy_series=doy_series)) ensemble = np.array(ensemble).astype(np.float) # apply nan to non predictions ensemble[ensemble==999]=np.nan prediction = np.mean(ensemble, axis=0) prediction_sd = np.std(ensemble, axis=0) # extend the axis by 1 to match the xarray creation prediction = np.expand_dims(prediction, axis=0) prediction_sd = np.expand_dims(prediction_sd, axis=0) forecast = xr.Dataset(data_vars = {'doy_prediction':(('model', 'lat','lon'), prediction), 'doy_sd':(('model', 'lat','lon'), prediction_sd)}, coords = {'model':[model_nickname], 'lat':land_mask.lat, 'lon':land_mask.lon}) forecast = forecast.chunk({'lat':50,'lon':50}) if i==0: phenocam_forecasts = forecast else: phenocam_forecasts = xr.merge([phenocam_forecasts,forecast]) print(divider) print('phenocam phenology forecast final processing') current_season = tools.current_growing_season(config) provenance_note = \ """Forecasts for plant phenology of select species flowering and/or leaf out times for the {s} season. Made on {t} from NOAA CFSv2 forecasts downscaled using PRISM climate data. Phenocam models built by Eli Melaas. """.format(s=current_season, t=today) phenocam_forecasts.attrs['note']=provenance_note phenocam_forecasts.attrs['issue_date']=str(today) phenocam_forecasts.attrs['crs']='+init=epsg:4269' forecast_filename = config['phenology_forecast_folder']+'phenocam_phenology_forecast_'+str(today)+'.nc' phenocam_forecasts = phenocam_forecasts.chunk({'lat':50,'lon':50}) phenocam_forecasts.to_netcdf(forecast_filename, encoding={'doy_prediction':{'zlib':True, 'complevel':4, 'dtype':'int32', 'scale_factor':0.001, '_FillValue': -9999}, 'doy_sd':{'zlib':True, 'complevel':4, 'dtype':'int32', 'scale_factor':0.001, '_FillValue': -9999}}) # Return filename of final forecast file for use by primary script return forecast_filename