def get_global_overview_stats_on_model_runs(res='4x5'): """ Process bpch files to NetCDF, then get general stats on runs """ # Get the model run locations, then process to NetCDF from bpch run_dict = get_dictionary_of_IC_runs(res=res, NetCDF=False) runs = list(sorted(run_dict.keys())) for run in runs: folder = run_dict[run] print(run, run_dict[run]) a = AC.get_O3_burden_bpch(folder) print(run, run_dict[run], a.sum()) # Get summary stats on model runs. df = AC.get_general_stats4run_dict_as_df_bpch(run_dict=run_dict, REF1='GFAS.DICE', res=res) # Setup a name for the csv file filestr = 'PREFIA_summary_stats_{}{}.csv' if res == '4x5': filename = filestr.format(res, '') elif res == '2x2.5': filename = filestr.format(res, '_VALUES_ARE_APROX_bpch_files_incomplete') else: print("WARNING: resolution ('{}') not known".format(res)) # Save summary stats to disk df.T.to_csv(filename)
def get_inorg_emissions_for_params(wd_dict=None, res='4x5'): """ Get inorganic emissions for the difference parameterisations """ from A_PD_hal_paper_analysis_figures.halogen_family_emission_printer import get_species_emiss_Tg_per_yr specs = ['HOI', 'I2'] # Retrieve the surface area for a given resolution s_area = AC.get_surface_area(res=res) # calc emissions! inorg_emiss = {} for param in wd_dict.keys(): print(param) wd = wd_dict[param] months = AC.get_gc_months(wd=wd) years = AC.get_gc_years(wd=wd) # Get emissions ars = get_species_emiss_Tg_per_yr(wd=wd, specs=specs, ref_spec='I', s_area=s_area, years=years, months=months) # Add sums ars += [ars[0] + ars[1]] inorg_emiss[param] = ars return inorg_emiss, specs + ['Inorg']
def plt_X_vs_Y_for_obs_v_params(df=None, params2plot=[], obs_var='Obs.', extr_str='', context='paper', dpi=320): """ Plot up comparisons for parameterisations against observations """ import seaborn as sns sns.set(color_codes=True) sns.set_context(context) # Get colours to use CB_color_cycle = AC.get_CB_color_cycle() color_dict = dict(zip([obs_var]+params2plot, ['k']+CB_color_cycle)) # Setup the figure and axis for the plot fig = plt.figure(dpi=dpi, facecolor='w', edgecolor='k') ax = fig.add_subplot(111) # Loop by parameter for n_param, param in enumerate( params2plot ): # Plot a single 1:1 line plot_121 = False if n_param == 0: plot_121 =True # Now plot a generic X vs. Y plot AC.plt_df_X_vs_Y(df=df, fig=fig, ax=ax, y_var=param, x_var=obs_var, x_label=obs_var, y_label=param, color=color_dict[param], save_plot=False, plot_121=plot_121 ) # Add a title title_str = "Obs. vs. predictions in '{}'".format(extr_str) plt.title(title_str) # Add a legend plt.legend() # Save the plot png_filename = 's2s_X_vs_Y_{}_vs_{}_{}'.format(obs_var, 'params', extr_str) png_filename = AC.rm_spaces_and_chars_from_str(png_filename) plt.savefig(png_filename, dpi=dpi)
def main(trop_limit=True, res='4x5', debug=False): """ Get prod loss output for a family and print this to screen """ # --- Get family from Command line (and other vars) wd = sys.argv[1] spec = sys.argv[2] # version? ver = AC.iGEOSChem_ver(wd) # --- Get all tags for this family (through dictionary route) # ( e.g. 'PIOx', 'LIOx', 'P_Iy', 'L_Iy' ) nums, rxns, tags, Coe = AC.prod_loss_4_spec(wd, spec, ver=ver) # beatify reaction strings rxnstr_l = [''.join(i[4:]) for i in rxns] # one consider one tag per reaction and tagged reactions try: tags = [i[0] for i in tags] # just consider first tag except: print 'WARNING! - attempting to process just tagged reactions' detail_zip = zip(rxnstr_l, zip(nums, tags)) untagged = [n for n, i in enumerate(tags) if (len(i) < 1)] print 'Untagged reactions: ', [detail_zip[i] for i in untagged] tags = [i for n, i in enumerate(tags) if (n not in untagged)] tags = [i[0] for i in tags] # just consider first tag # tags.pop( tags.index('LR71') ) # rm tag for ClOO loss... # --- Extract prod loss for these tracers # get prod loss IDs PDs = [AC.PLO3_to_PD(i, ver=ver, wd=wd, fp=True) for i in tags] # extract en mass fam_loss = AC.get_GC_output( wd, vars=['PORL_L_S__'+i for i in PDs], \ trop_limit=trop_limit, r_list=True) # print [ ( i.shape, i.sum() ) for i in fam_loss ] # Get reference species for family ( e.g. so output is in X g of Y ) ref_spec = AC.get_ref_spec(spec) # get shared variable arrrays s_area = get_surface_area(res=res)[..., 0] # m2 land map # convert to mass terms ( in g X ) fam_loss = convert_molec_cm3_s_2_g_X_s( ars=fam_loss, \ ref_spec=ref_spec, wd=wd, conbine_ars=False, \ rm_strat=True, month_eq=True ) print[i.shape for i in fam_loss] # sum and convert to Gg p_l = [i.sum() / 1E9 for i in fam_loss] # --- print output as: reaction, magnitude, percent of family pcent = [np.sum(i) / np.sum(p_l) * 100 for i in p_l] d = dict(zip(tags, zip(rxnstr_l, p_l, pcent))) df = pd.DataFrame(d).T df.columns = ['rxn', 'Gg X', '% of total'] # sort df = df.sort_values(['% of total'], ascending=False) print df
def mk_NetCDF_of_pf_files( files, ncfilename=None, debug=False ): """ Make a table like NetCDF file from to any pf output """ # --- Setup NetCDF file ncfile = Dataset( ncfilename,'w', format='NETCDF4') # --- Loop files, read in and add to NetCDF npoint = 1 for n, file in enumerate( files ): # If 1st file setup NetCDF if n == 0: # Get Header infomation from first file vars, sites = get_pf_headers( files[0], debug=debug ) # Extract all points from file df, vars = AC.pf_csv2pandas( file=file, vars=vars, epoch=True,\ r_vars=True ) if debug: print df.shape, df.columns # set unlimited data points dimension (POINT) POINT = ncfile.createDimension( 'POINT', None ) # loop and create variables for each column (exc. last ) if debug: print vars [ ncfile.createVariable( var, var2type(var), ('POINT') ) \ for var in vars ] # close the file ncfile.close() else: # Extract all points from file df, vars = AC.pf_csv2pandas( file=file, vars=vars, epoch=True, \ r_vars=True ) # Open the file in append mode ncfile = Dataset( ncfilename,'a', format='NETCDF4') if debug: print df.index # Fill variables for given dim_len = len( df.index ) for var in vars: ncfile.variables[ var ][npoint:npoint+dim_len] = df[var].values # Tidy up and count npoint += dim_len del df ncfile.close()
def add_loc_ocean2df(df=None, LatVar='lat', LonVar='lon'): """ Add the ocean of a location to dataframe Parameters ------- df (pd.DataFrame): DataFrame of data LatVar (str): variable name in DataFrame for latitude LonVar (str): variable name in DataFrame for longitude Returns ------- (pd.DataFrame) """ from geopandas.tools import sjoin # Get the shapes for the ocean featurecla='ocean' group = AC.get_shapes4oceans(rtn_group=True, featurecla=featurecla) # Turn the dataframe into a geopandas dataframe gdf = geopandas.GeoDataFrame( df, geometry=geopandas.points_from_xy(df[LonVar], df[LatVar])) # Work out if any of the points are within the polys pointInPolys = sjoin(gdf, group, how='left') # Check how many were assigned to a region Nnew = float(pointInPolys['name'].dropna().shape[0]) N = float(df.shape[0]) if N != Nnew: pstr = 'WARNING: Only {:.2f}% assigned ({} of {})' print( pstr.format( (Nnew/N)*100, int(Nnew), int(N)) ) # Add the ocean assignment df[featurecla] = pointInPolys['name'].values return df
def mk_NetCDF_of_global_oceans(df=None, LatVar='lat', LonVar='lon', save2NetCDF=False): """ Add the regional location of observations to dataframe Parameters ------- df (pd.DataFrame): DataFrame of data LatVar (str): variable name in DataFrame for latitude LonVar (str): variable name in DataFrame for longitude Returns ------- (pd.DataFrame) """ # Get AC_tools location, then set example data folder location import os import xarray as xr import inspect filename = inspect.getframeinfo(inspect.currentframe()).filename path = os.path.dirname(os.path.abspath(filename)) folder = path+'/data/LM/LANDMAP_LWI_ctm_0125x0125/' # Get coords from LWI 0.125x0.125 data and remove the time dimension ds = xr.open_dataset(folder+'ctm.nc') ds = ds.mean(dim='time') # Add a raster array for the oceans ds = AC.add_raster_of_oceans2ds(ds, test_plot=True, country=country) # save as a NetCDF? if save2NetCDF: ds.to_netcdf() else: return ds
def main( trop_limit=True, res='4x5', debug=False): """ Get prod loss output for a family and print this to screen """ # --- Get family from Command line (and other vars) wd = sys.argv[1] spec = sys.argv[2] # version? ver = AC.iGEOSChem_ver( wd) # --- Get all tags for this family (through dictionary route) # ( e.g. 'PIOx', 'LIOx', 'P_Iy', 'L_Iy' ) nums, rxns, tags, Coe = AC.prod_loss_4_spec( wd, spec, ver=ver ) # beatify reaction strings rxnstr_l = [ ''.join( i[4:] ) for i in rxns ] # one consider one tag per reaction and tagged reactions try: tags = [ i[0] for i in tags ] # just consider first tag except: print 'WARNING! - attempting to process just tagged reactions' detail_zip = zip( rxnstr_l, zip( nums, tags) ) untagged = [n for n,i in enumerate(tags) if (len(i)<1) ] print 'Untagged reactions: ', [ detail_zip[i] for i in untagged ] tags = [ i for n, i in enumerate( tags ) if (n not in untagged) ] tags = [ i[0] for i in tags ] # just consider first tag # tags.pop( tags.index('LR71') ) # rm tag for ClOO loss... # --- Extract prod loss for these tracers # get prod loss IDs PDs = [ AC.PLO3_to_PD(i, ver=ver, wd=wd, fp=True) for i in tags ] # extract en mass fam_loss = AC.get_GC_output( wd, vars=['PORL_L_S__'+i for i in PDs], \ trop_limit=trop_limit, r_list=True) # print [ ( i.shape, i.sum() ) for i in fam_loss ] # Get reference species for family ( e.g. so output is in X g of Y ) ref_spec = AC.get_ref_spec( spec ) # get shared variable arrrays s_area = get_surface_area(res=res)[...,0] # m2 land map # convert to mass terms ( in g X ) fam_loss = convert_molec_cm3_s_2_g_X_s( ars=fam_loss, \ ref_spec=ref_spec, wd=wd, conbine_ars=False, \ rm_strat=True, month_eq=True ) print [ i.shape for i in fam_loss ]
def check_plots4plotting(): """ Do a test plot of the colour cycle being used for plotting """ # Get colours CB_color_cycle = AC.get_CB_color_cycle() CB_color_cycle += ['darkgreen'] # Do a quick plots for these x = np.arange(10) for n_color, color in enumerate(CB_color_cycle): plt.plot(x, x * n_color, color=color)
def add_LWI2ds_2x25_4x5(ds, var2template='Chance2014_STTxx2_I', res='0.125x0.125', inc_booleans_and_area=True): """ Add Land/Water/Ice (LWI) values to xr.DataArray Parameters ------- ds (xr.Dataset): xarray dataset to add LWI to res (str): horizontal resolution of dataset (e.g. 4x5) var2template (str): variable to use a template for making LWI variable inc_booleans_and_area (bool): include extra booleans and surface area Returns ------- (xr.Dataset) """ # Add LWI to array LWI = AC.get_LWI_map(res=res)[..., 0] LWI = np.array([LWI.T] * 12) print(LWI.shape, ds[var2template].shape) if inc_booleans_and_area: ds['IS_WATER'] = ds[var2template].copy() ds['IS_WATER'].values = (LWI == 0) # add is land ds['IS_LAND'] = ds['IS_WATER'] ds['IS_LAND'].values = (LWI == 1) # get surface area s_area = AC.get_surface_area(res)[..., 0] # m2 land map ds['AREA'] = ds[var2template].mean(dim='time') ds['AREA'].values = s_area.T else: ds['LWI'] = LWI['LWI'] # Update attributes too attrs = ds['LWI'].attrs.copy() attrs['long_name'] = 'Land/Water/Ice index' attrs[ 'Detail'] = 'A Land-Water-Ice mask. It is 1 over continental areas, 0 over open ocean, and 2 over seaice covered ocean.' ds['LWI'].attrs = attrs return ds
def add_LWI2ds_0125x0125(ds, var2template='Chance2014_STTxx2_I', res='0.125x0.125', inc_booleans_and_area=True): """ Add Land/Water/Ice (LWI) values to xr.DataArray Parameters ------- ds (xr.Dataset): xarray dataset to add LWI to res (str): horizontal resolution (e.g. 4x5) of Dataset inc_booleans_and_area (bool): include extra booleans and surface area var2template (str): variable to use a template for making LWI variable Returns ------- (xr.dataset) """ folderLWI = get_file_locations('AC_tools') folderLWI += '/data/LM/LANDMAP_LWI_ctm_0125x0125/' filenameLWI = 'ctm.nc' LWI = xr.open_dataset(folderLWI + filenameLWI) # updates dates (to be Jan=>Dec) new_dates = [datetime.datetime(1970, i, 1) for i in LWI['time.month']] LWI.time.values = new_dates # Sort by new dates LWI = LWI.loc[{'time': sorted(LWI.coords['time'].values)}] if inc_booleans_and_area: ds['IS_WATER'] = ds[var2template].copy() ds['IS_WATER'].values = (LWI['LWI'] == 0) # add is land ds['IS_LAND'] = ds['IS_WATER'].copy() ds['IS_LAND'].values = (LWI['LWI'] == 1) # get surface area # s_area = AC.calc_surface_area_in_grid(res=res).T # m2 land map (Calculate) s_area = AC.get_surface_area(res)[..., 0] # m2 land map (Use CDO value) ds['AREA'] = ds[var2template].mean(dim='time') ds['AREA'].values = s_area else: ds['LWI'] = LWI['LWI'] # Update attributes too attrs = ds['LWI'].attrs.copy() attrs['long_name'] = 'Land/Water/Ice index' attrs[ 'Detail'] = 'A Land-Water-Ice mask. It is 1 over continental areas, 0 over open ocean, and 2 over seaice covered ocean.' attrs['add_offset'] = int(0) attrs['scale_factor'] = int(1) attrs['missing_value'] = float(-1e-32) attrs['_FillValue'] = float(-1e-32) attrs['units'] = 'unitless' ds['LWI'].attrs = attrs return ds
def update_time_in_NetCDF2save(ds, convert_time2dt=False): """ Update time of monthly output to be in NetCDF saveable format Parameters ------- convert_time2dt (bool): convert the time into a datetime.datetime format """ # Climate model time sdate = datetime.datetime(1985, 1, 1) # Convert / setup time dim? if convert_time2dt: months = np.arange(1, 13) ds['time'] = [AC.add_months(sdate, i - 1) for i in months] # Update to hours since X hours = [(AC.dt64_2_dt([i])[0] - sdate).days * 24. for i in ds['time'].values] ds['time'] = hours attrs_dict = {'units': 'hours since 1985-01-01 00:00:00'} ds['time'].attrs = attrs_dict return ds
def compare_emissions(wd_dict=None, inorg_emiss=None, specs=None): """ Compare emissions between runs with different parameterisations Parameters ------- wd_dict (dict): dictionary of names (keys) and locations of model runs inorg_emiss (dict): dictionary of inorganic iodine emissions for runs Returns ------- (pd.DataFrame) """ # Get emission runs that test output if isinstance(wd_dict, type(None)): wd_dict = get_emissions_testing_runs() params = sorted(wd_dict.keys()) # Get ozone burdens O3Burdens = [AC.get_O3_burden(wd_dict[i]) for i in params] O3Burdens = [i.sum() / 1E3 for i in O3Burdens] # Compile date into dataframe df = pd.DataFrame(O3Burdens, index=params, columns=['O3 bud.']) # Get emissions if isinstance(inorg_emiss, type(None)): inorg_emiss, specs = get_inorg_emissions_for_params(wd_dict=wd_dict) # Sum emissions for param in params: inorg_emiss[param] = [i.sum() for i in inorg_emiss[param]] # Convert to DatFrame and combine inorg_emiss_names = [i + ' emiss.' for i in specs] df2 = pd.DataFrame(inorg_emiss, index=inorg_emiss_names) df = pd.concat([df, df2.T], axis=1) # Add total inorganic flux? (Hasghed out for now ) # df['Inorg emiss'] = df[inorg_emiss_names].sum(axis=1) # Now do calculations to get change and difference between runs # calculate % change in values between runs df = df.T # param = 'RFR(offline)' refs = 'Chance2014', 'MacDonald2014' # Loop and calculate percentages for ref in refs: col_name = '({}% vs. {})'.format(param, ref) df[col_name] = (df[param] - df[ref]) / df[ref] * 100 df = df.T return df
def GetEmissionsFromHEMCONetCDFsAsDatasets(wds=None): """ Get the emissions from the HEMCO NetCDF files as a dictionary of datasets. """ # Look at emissions through HEMCO # Get data locations and run names as a dictionary if isinstance(wds, type(None)): wds = get_run_dict4EGU_runs() runs = list(wds.keys()) # # vars2use = [i for i in dsDH[run].data_vars if 'I' in i ] vars2use = [ 'EmisCH2IBr_Ocean', 'EmisCH2ICl_Ocean', 'EmisCH2I2_Ocean', 'EmisCH3I_Ocean', 'EmisI2_Ocean', 'EmisHOI_Ocean', ] # Loop and extract files dsDH = {} for run in runs: wd = wds[run] print(run, wd) dsDH[run] = AC.GetHEMCODiagnostics_AsDataset(wd=wd) # Get actual species specs = [i.split('Emis')[-1].split('_')[0] for i in vars2use] var_species_dict = dict(zip(vars2use, specs)) # Convert to Gg for run in runs: ds = dsDH[run] ds = AC.Convert_HEMCO_ds2Gg_per_yr(ds, vars2convert=vars2use, var_species_dict=var_species_dict) dsDH[run] = ds return dsDH
def make_2D_RDF_of_gridded_data(res='1x1', X_locs=None, Y_locs=None, Z_data=None): """ Make a 2D interpolation using RadialBasisFunctions """ import numpy as np from scipy.interpolate import Rbf import matplotlib.pyplot as plt # - Process dataframe here for now X_locs = df['Longitude'].values Y_locs = df['Latitude'].values Z_data = df['Iodide'].values # Degrade resolution if res == '1x1': X_COORDS, Y_COORDS, NIU = AC.get_latlonalt4res(res=res) # Remove double ups in data for now... print([len(i) for i in (X_locs, Y_locs)]) # Degrade to 1x1 resolution... X_locs = [int(i) for i in X_locs] Y_locs = [int(i) for i in Y_locs] # Make a dictionary to remove double ups... Z_dict = dict(list(zip(list(zip(X_locs, Y_locs)), Z_data))) # Unpack locs = sorted(Z_dict.keys()) Z_data = [Z_dict[i] for i in locs] X_locs, Y_locs = list(zip(*locs)) print([len(i) for i in (X_locs, Y_locs)]) # Setup meshgrid... XI, YI = np.meshgrid(X_COORDS, Y_COORDS) # Interpolate onto this... # Creating the interpolation function and populating the output matrix value rbf = Rbf(X_locs, Y_locs, Z_data, function='inverse') ZI = rbf(XI, YI) # Plotting the result n = plt.normalize(0.0, 100.0) plt.subplot(1, 1, 1) plt.pcolor(XI, YI, ZI) plt.scatter(X_locs, Y_locs, 100, Z_data) plt.title('RBF interpolation') plt.xlim(-180, 180) plt.ylim(-90, 90) plt.colorbar()
def main(wd=None, CODE_wd=None): """ Driver for analysis of LOx via KPP in GEOS-Chem Notes ----- - comment/uncommet functions as required """ # Manually let locations of Ox loss here root = '/users/ts551/scratch/GC/' CODE_wd = root + '/Code/Code.v11-02_Cl_v3_0/' wd = root + 'rundirs/GC_v11_2d_plus_Clv3/geosfp_4x5_tropchem_Cl.v3_0.1year.2016.tagged/' Mechanism = 'Tropchem' # Get all the necessary data as as a dictionary object Ox_loss_dict = AC.get_Ox_loss_dicts(wd=wd, CODE_wd=CODE_wd, Mechanism=Mechanism) # Plot vertical odd oxygen (Ox) loss via route (chemical family) plot_vertical_fam_loss_by_route(Ox_loss_dict=Ox_loss_dict, Mechanism=Mechanism) # Analyse odd oxygen (Ox) loss budget via route (chemical family) calc_fam_loss_by_route(Ox_loss_dict=Ox_loss_dict, Mechanism=Mechanism)
# look in the "plane_flight_logs" directory wd = wd+ '/plane_flight_logs/plane.log.*' #wd = wd+ '/plane.log.*' print wd print sorted(glob.glob(wd)) # Asectics fontsize = 10 # ----------- START PLOTTING HERE ---------------------------------------- # ------------- # Get species name in TRA_?? (planefligth output) form #if 'TRA' in species_to_plot: species_to_plot=[ AC.what_species_am_i( species_to_plot, ver=ver, invert=True ) ] #else: # species_to_plot=[species_to_plot] # setup figure fig = plt.figure(figsize=(15,6), dpi=80, facecolor='w', edgecolor='k') # Loop sites in site list and plot species for i,site in enumerate(locations): # extract data from planeflight (csv) files model, names = AC.readfile( sorted(glob.glob(wd) ), site, \ years_to_use, months_to_use, days_to_use) # get species index in list k=names.index(species_to_plot[0])
def plot_up_df_data_by_yr(df=None, Datetime_var='datetime', TimeWindow=5, start_from_last_obs=False, drop_bins_without_data=True, target='Iodide', dpi=320): """ Plot up # of obs. data (Y) binned by region against year (X) Parameters ------- df (pd.DataFrame): DataFrame of data with and a datetime variable target (str): Name of the target variable (e.g. iodide) TimeWindow (int): number years to bit observations over start_from_last_obs (bool): start from the last observational date drop_bins_without_data (bool): exclude bins with no data from plotting dpi (int): resolution of figure (dots per sq inch) Returns ------- (None) """ # Sort the dataframe by date df.sort_values( by=Datetime_var, inplace=True ) # Get the minimum and maximum dates min_date = df[Datetime_var].min() max_date = df[Datetime_var].max() # How many years of data are there? yrs_of_data = (max_date-min_date).total_seconds()/60/60/24/365 nbins = AC.myround(yrs_of_data/TimeWindow, base=1 ) # Start from last observation or from last block of time sdate_block = AC.myround(max_date.year, 5) sdate_block = datetime.datetime(sdate_block, 1, 1) # Make sure the dates used are datetimes min_date, max_date = pd.to_datetime( [min_date, max_date] ).values min_date, max_date = AC.dt64_2_dt( [min_date, max_date]) # Calculate the number of points for each bin by region dfs = {} for nbin in range(nbins+2): # Start from last observation or from last block of time? days2rm = int(nbin*365*TimeWindow) if start_from_last_obs: bin_start = AC.add_days( max_date, -int(days2rm+(365*TimeWindow))) bin_end = AC.add_days( max_date, -days2rm ) else: bin_start = AC.add_days( sdate_block,-int(days2rm+(365*TimeWindow))) bin_end = AC.add_days( sdate_block, -days2rm ) # Select the data within the observational dates bool1 = df[Datetime_var] > bin_start bool2 = df[Datetime_var] <= bin_end df_tmp = df.loc[bool1 & bool2, :] # Print the number of values in regions for bin if verbose: print(bin_start, bin_end, df_tmp.shape) # String to save data with if start_from_last_obs: bin_start_str = bin_start.strftime( '%Y/%m/%d') bin_end_str = bin_end.strftime( '%Y/%m/%d') else: bin_start_str = bin_start.strftime( '%Y') bin_end_str = bin_end.strftime( '%Y') str2use = '{}-{}'.format(bin_start_str, bin_end_str) # Sum up the number of values by region dfs[ str2use] = df_tmp['ocean'].value_counts(dropna=False) # Combine to single dataframe and sort by date dfA = pd.DataFrame( dfs ) dfA = dfA[list(sorted(dfA.columns)) ] # Drop the years without any data if drop_bins_without_data: dfA = dfA.T.dropna(how='all').T # Update index names dfA = dfA.T dfA.columns rename_cols = { np.NaN : 'Other', 'INDIAN OCEAN': 'Indian Ocean', 'SOUTHERN OCEAN' : 'Southern Ocean' } dfA = dfA.rename(columns=rename_cols) dfA = dfA.T # Plot up as a stacked bar plot import seaborn as sns sns.set() dfA.T.plot(kind='bar', stacked=True) # Add title etc plt.ylabel( '# of observations') plt.title( '{} obs. data by region'.format(target)) # Save plotted figure savename = 's2s_{}_data_by_year_region'.format(target) plt.savefig(savename, dpi=dpi, bbox_inches='tight', pad_inches=0.05)
""" This programme makes the planeflight*.dat files required to output for specific locations and times in the model. NOTES: - This programme can be used to produce files to output data for ship and aricraft campaigns """ # --- Packages import numpy as np from time import gmtime, strftime import time import glob import AC_tools as AC # --- Settings try: wd = AC.get_dir( 'dwd' ) except: wd = './' # the directory where files of required output locations are (one per UTC day) camp = './' # Set location for output files here # Set first and last year to look start_year, end_year = 2013, 2015 # debug the oubtput debug = True # tag to (up to 4 characters) tag= 'CON' #'TRB'#'MAL'#'ANT' # do the altitude values need converting from input files ( must be in hPa) convert_km_2_hPa, convert_m_2_hPa, convert_2_m = False, False, False time_str ='%H:%M' # '%H%M' # '%H:%M:%S' # # '%h/%m/%s', # Which (halogen) code version is being used? #ver = '1.6' # Iodine simulation in v9-2
#!/usr/bin/python # modules import AC_tools as AC import numpy as np import sys import matplotlib.pyplot as plt # Setup, choose species species = 'O3'#'CO2' RMM_species = 16.*3. res = '4x5' # ( e.g. '4x5', '2x2.5', '0.5x0.666', '0.25x0.3125' ) unit, scale = AC.tra_unit( species, scale=True) # debug/print verbose output? debug=True # Only consider GEOS-Chem chemical troposphere trop_limit=True calc_burden=False#True try: # chcck if a directory was given ad command line wd = sys.argv[1] except: # Otherwise use path below wd = '<insert GEOS-Chem run direcotory path here>' # get data as 4D array ( lon, lat, alt, time ) mixing_ratio = AC.get_GC_output( wd, species=species, category='IJ-AVG-$', \ trop_limit=trop_limit ) print mixing_ratio.shape # Get data to calculate burden
#!/usr/bin/python # modules import AC_tools as AC import numpy as np import sys import matplotlib.pyplot as plt # Setup, choose species species = 'O3' #'CO2' RMM_species = 16. * 3. res = '4x5' # ( e.g. '4x5', '2x2.5', '0.5x0.666', '0.25x0.3125' ) unit, scale = AC.tra_unit(species, scale=True) # debug/print verbose output? debug = True # Only consider GEOS-Chem chemical troposphere trop_limit = True calc_burden = False #True try: # chcck if a directory was given ad command line wd = sys.argv[1] except: # Otherwise use path below wd = '<insert GEOS-Chem run direcotory path here>' # get data as 4D array ( lon, lat, alt, time ) mixing_ratio = AC.get_GC_output( wd, species=species, category='IJ-AVG-$', \ trop_limit=trop_limit ) print mixing_ratio.shape # Get data to calculate burden
def process_MLD_csv2NetCDF(debug=False, _fill_value=-9999.9999E+10): """ Process NOAA WOA94 csv files into NetCDF files Parameters ------- _fill_value (float): fill value to use for new NetCDF debug (bool): perform debugging and verbose printing? Returns ------- (xr.Dataset) """ # The MLD fields available are computed from climatological monthly mean # profiles of potential temperature and potential density based on three # different criteria: a temperature change from the ocean surface of 0.5 # degree Celsius, a density change from the ocean surface of 0.125 # (sigma units), and a variable density change from the ocean surface # corresponding to a temperature change of 0.5 degree Celsius. The MLD # based on the variable density criterion is designed to account for the # large variability of the coefficient of thermal expansion that # characterizes seawater. # Citation: Monterey, G. and Levitus, S., 1997: Seasonal Variability of # Mixed Layer Depth for the World Ocean. NOAA Atlas NESDIS 14, U.S. # Gov. Printing Office, Wash., D.C., 96 pp. 87 figs. (pdf, 13.0 MB). # variables for MLD_vars = ['pt', 'pd', 'vd'] folder = utils.get_file_locations('data_root') + '/WOA94/' # - Loop MLD variables for var_ in MLD_vars: file_str = 'mld*{}*'.format(var_) files = sorted(glob.glob(folder+file_str)) print(files) # Loop files and extract data as an arrayu ars = [] for file in files: # values are assume to have been outputed in a row major way # e.g. (lon, lat) # open with open(file, 'rb') as file_: # Extract all values lines = [i.split() for i in file_] # Convert to floats (and masked values (e.g. "-") to NaN ), # the concatenate to "big" list big = [] for n, line in enumerate(lines): for value in line: try: value = float(value) except ValueError: value = np.NaN big += [value] # Now reshape ars += [np.ma.array(big).reshape((180, 360)).T] # Debug (?) by showing 2D grid if debug: plt.pcolor(np.arange(0, 360), np.arange(0, 180), ars[0]) plt.colorbar() plt.show() # Force to be in COARDS format? (e.g. lat, lon) instead of (lon, lat) ars = [i.T for i in ars] # Fill nans with _fill_value, ars = [np.ma.filled(i, fill_value=_fill_value) for i in ars] # Then convert to numpy array... ars = [np.array(i) for i in ars] print([type(i) for i in ars]) # Force dates dates = [datetime.datetime(1985, 1, i+1) for i in range(12)] lons = np.arange(0+0.5, 360+0.5, 1) lats = np.arange(-90+0.5, 90+0.5, 1) res = '1x1' # Save to NetCDF AC.save_2D_arrays_to_3DNetCDF(ars=ars, dates=dates, varname=var_, res=res, filename='WOA94_MLD_1x1_{}'.format(var_), lons=lons, lats=lats)
def plot_up_seasonal_averages_of_prediction(ds=None, target=None, version='v0_0_0', seperate_plots=False, units='pM', var2plot='Ensemble_Monthly_mean', vmin=None, vmax=None, dpi=320, show_plot=False, save_plot=True, var2plot_longname='ensemble prediction', extension='png', verbose=False ): """ Wrapper to plot up the annual averages of the predictions Parameters ------- ds (xr.Dataset): 3D dataset containing variable of interest on monthly basis var2plot (str): which variable should be plotted? target (str): Name of the target variable (e.g. iodide) version (str): Version number or string (present in NetCDF names etc) seperate_plots (bool): plot up output as separate plots verbose (bool): print out verbose output? Returns ------- (None) """ # Get average by season ds = ds.groupby('time.season').mean(dim='time') # Calculate minimums and maximums over all months to use for all plots if isinstance(vmin, type(None)) and isinstance(vmin, type(None)): vmin = float(ds[var2plot].min().values) vmax = float(ds[var2plot].max().values) # Dictionary to convert season acronyms to readable text season2text = { 'DJF':'Dec-Jan-Feb', 'MAM': 'Mar-Apr-May', 'JJA': 'Jun-Jul-Aug', 'SON':'Sep-Oct-Nov' } # Set season ordering to be maintained for all plots seasons = ['DJF', 'MAM', 'JJA', 'SON'] # Plot by season if seperate_plots: for season in seasons: # check and name variables extr_str = '{}_{}'.format(version, season2text[season]) if verbose: print( season, extr_str ) # Select data for month ds2plot = ds[[var2plot]].sel(season=season) # Set a title title = "Seasonal ({}) average {} for '{}' ({})" title = title.format(season, target, var2plot_longname, units) # Now plot plot_spatial_data(ds=ds2plot, var2plot=var2plot, extr_str=extr_str, target=target, title=title, vmin=vmin, vmax=vmax) # Or plot up as a window plot else: fig = plt.figure(figsize=(9, 5), dpi=dpi) projection = ccrs.Robinson() # Loop by season for n_season, season in enumerate(seasons): # Select data for month ds2plot = ds[[var2plot]].sel(season=season) # Setup the axis axn = (2, 2, n_season+1) ax = fig.add_subplot(*axn, projection=projection, aspect='auto') # Now plot plot_spatial_data(ds=ds2plot, var2plot=var2plot, ax=ax, fig=fig, target=target, title=season2text[season], vmin=vmin, vmax=vmax, rm_colourbar=True, save_plot=False ) # Capture the image from the axes im = ax.images[0] # Add a colorbar using the captured image pad = 0.075 cax = fig.add_axes([0.85, pad*2, 0.035, 1-(pad*4)]) fig.colorbar(im, cax=cax, orientation='vertical', label=units) # Set a title title = "Seasonally averaged '{}' ({})" title = title.format(var2plot_longname, units) fig.suptitle( title ) # Adjust plot aesthetics bottom = pad/4 top = 1-(pad) left = pad/4 right = 1-(pad*2.5) hspace = 0.005 wspace = pad/3 fig.subplots_adjust(bottom=bottom, top=top, left=left, right=right, hspace=hspace, wspace=wspace) # Save or show plot if show_plot: plt.show() if save_plot: filename = 's2s_spatial_by_season_{}_{}'.format(target, version) filename = AC.rm_spaces_and_chars_from_str( filename ) plt.savefig('{}.{}'.format(filename, extension), dpi=dpi)
def plot_spatial_data(ds=None, var2plot=None, LatVar='lat', LonVar='lon', extr_str='', fillcontinents=True, target=None, units=None, show_plot=False, save_plot=True, title=None, projection=ccrs.Robinson(), fig=None, ax=None, cmap=None, vmin=None, vmax=None, add_meridians_parallels=False, add_borders_coast=True, set_aspect=True, cbar_kwargs=None, xticks=True, yticks=True, rm_colourbar=False, extension='png', dpi=320): """ Plot up 2D spatial plot of latitude vs. longitude Parameters ------- ds (xr.Dataset): 3D dataset containing variable of interest on monthly basis var2plot (str): variable to plot from dataset target (str): Name of the target variable (e.g. iodide) version (str): Version number or string (present in NetCDF names etc) file_and_path (str): folder and filename with location settings as single str res (str): horizontal resolution of dataset (e.g. 4x5) xticks, yticks (bool): include ticks on y and/or x axis? title (str): title to add use for plot LatVar, LonVar (str): variables to use for latitude and longitude add_meridians_parallels (bool): add the meridians and parallels? save_plot (bool): save the plot as png show_plot (bool): show the plot on screen dpi (int): resolution to use for saved image (dots per square inch) projection (cartopy ccrs object): projection to use for spatial plots rm_colourbar (bool): do not include a colourbar with the plot fig (figure instance): figure instance to plot onto extension (str): extension to save file with (e.g. .tiff, .eps, .png) ax (axis instance): axis to use for plotting Returns ------- (None) """ import cartopy.crs as ccrs from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER if isinstance(fig, type(None)): fig = plt.figure(figsize=(10, 6)) if isinstance(ax, type(None)): ax = fig.add_subplot(111, projection=projection, aspect='auto') plt_object = ds[var2plot].plot.imshow(x='lon', y='lat', ax=ax, vmax=vmax, vmin=vmin, transform=ccrs.PlateCarree(), cmap=cmap, cbar_kwargs=cbar_kwargs) # Fill the continents if fillcontinents: ax.add_feature(cartopy.feature.LAND, zorder=50, facecolor='lightgrey', edgecolor='k') # Add the borders and country outlines if add_borders_coast: ax.add_feature(cartopy.feature.BORDERS, zorder=51, edgecolor='k', linewidth=0.25) ax.add_feature(cartopy.feature.COASTLINE, zorder=52, edgecolor='k', linewidth=0.05) # Beautify ax.coastlines() ax.set_global() # Add a title if not isinstance(title, type(None)): plt.title(title) # Add meridians and parallels? if add_meridians_parallels: # setup grdlines object gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=0, color='gray', alpha=0.0, linestyle=None) # Setup meridians and parallels interval = 1 parallels = np.arange(-90, 91, 30*interval) meridians = np.arange(-180, 181, 60*interval) # Now add labels gl.xlabels_top = False gl.ylabels_right = False gl.xlines = False gl.ylines = False if xticks: gl.xticks_bottom = True gl.xlocator = matplotlib.ticker.FixedLocator(meridians) gl.xformatter = LONGITUDE_FORMATTER gl.xlabel_style = {'size': 7.5, 'color': 'gray'} else: gl.xticks_bottom = False gl.xlabels_bottom = False if yticks: gl.yticks_left = True gl.ylocator = matplotlib.ticker.FixedLocator(parallels) gl.yformatter = LATITUDE_FORMATTER gl.ylabel_style = {'size': 7.5, 'color': 'gray'} else: gl.yticks_left = False gl.ylabel_left = False # Remove the colour bar if rm_colourbar: im = ax.images cb = im[-1].colorbar cb.remove() # Save or show plot if show_plot: plt.show() if save_plot: filename = 's2s_spatial_{}_{}'.format(target, extr_str) filename = '{}.{}'.format(AC.rm_spaces_and_chars_from_str(filename), extension) plt.savefig(filename, dpi=dpi, bbox_inches='tight', pad_inches=0.05) return plt_object
ver = '3.0' # look in the "plane_flight_logs" directory wd = wd + '/plane_flight_logs/plane.log.*' #wd = wd+ '/plane.log.*' print wd print sorted(glob.glob(wd)) # Asectics fontsize = 10 # ----------- START PLOTTING HERE ---------------------------------------- # ------------- # Get species name in TRA_?? (planefligth output) form species_to_plot = [AC.what_species_am_i(species_to_plot, ver=ver, invert=True)] # setup figure fig = plt.figure(figsize=(15, 6), dpi=80, facecolor='w', edgecolor='k') # Loop sites in site list and plot species for i, site in enumerate(locations): # extract data from planeflight (csv) files model, names = AC.readfile( sorted(glob.glob(wd) ), site, \ years_to_use, months_to_use, days_to_use) # get species index in list k = names.index(species_to_plot[0]) # plot up extracted data
import AC_tools as AC # Download the example data if it is not already downloaded. from AC_tools.Scripts import get_data_files # Specify the working directory wd = "../data" # Get the GeosChem species data from the wd my_data = AC.get_GC_output( wd, species='O3') # Get a 2d slice from the 3d array my_data = my_data[:,:,0,0] # Turn from part per part to part per billion my_data = my_data*1E9 # Create the plot AC.map_plot( my_data) # Save the plot and show it. AC.save_plot("my_plot") AC.show_plot()
def plot_vertical_fam_loss_by_route(fam='LOx', ref_spec='O3', wd=None, Mechanism='Halogens', rm_strat=False, weight_by_molecs=True, CODE_wd=None, full_vertical_grid=True, dpi=320, suffix='', save_plot=True, show_plot=False, limit_plotted_alititude=True, lw=16, Ox_loss_dict=None, fontsize=10, cmap=plt.cm.jet, verbose=True, debug=False): """ Plot vertical odd oxygen (Ox) loss via route (chemical family) Parameters ------- fam (str): tagged family to track (already compiled in KPP mechanism) ref_spec (str): reference species to normalise to wd (str): working directory ("wd") of model output CODE_wd (str): root of code directory containing the tagged KPP mechanism Mechanism (str): name of the KPP mechanism (and folder) of model output weight_by_molecs (bool): weight grid boxes by number of molecules rm_strat (bool): (fractionally) replace values in statosphere with zeros debug, verbose (bool): switches to turn on/set verbosity of output to screen full_vertical_grid (bool): use the full vertical grid for analysis limit_plotted_alititude (bool): limit the plotted vertical extend to troposphere suffix (str): suffix in filename for saved plot dpi (int): resolution to use for saved image (dots per square inch) Ox_loss_dict (dict), dictionary of Ox loss variables/data (from get_Ox_loss_dicts) Returns ------- (None) Notes ----- - AC_tools includes equivlent functions for smvgear mechanisms """ # - Local variables/ Plot extraction / Settings if isinstance(Ox_loss_dict, type(None)): Ox_loss_dict = AC.get_Ox_loss_dicts( wd=wd, CODE_wd=CODE_wd, fam=fam, ref_spec=ref_spec, Mechanism=Mechanism, rm_strat=rm_strat, weight_by_molecs=weight_by_molecs, full_vertical_grid=full_vertical_grid, ) # extract variables from data/variable dictionary sorted_fam_names = Ox_loss_dict['sorted_fam_names'] fam_dict = Ox_loss_dict['fam_dict'] ars = Ox_loss_dict['ars'] RR_dict_fam_stioch = Ox_loss_dict['RR_dict_fam_stioch'] RR_dict = Ox_loss_dict['RR_dict'] tags2_rxn_num = Ox_loss_dict['tags2_rxn_num'] tags = Ox_loss_dict['tags'] tags_dict = Ox_loss_dict['tags_dict'] Data_rc = Ox_loss_dict['Data_rc'] # Combine to a single array arr = np.array(ars) if debug: print((arr.shape)) # - Process data for plotting fam_tag = [fam_dict[i] for i in tags] fam_ars = [] for fam_ in sorted_fam_names: # Get indices for routes of family fam_ind = [n for n, i in enumerate(fam_tag) if (i == fam_)] if debug: print((fam_ind, len(fam_ind))) # Select these ... fam_ars += [arr[fam_ind, ...]] # Recombine and sum by family... if debug: print(([i.shape for i in fam_ars], len(fam_ars))) arr = np.array([i.sum(axis=0) for i in fam_ars]) if debug: print((arr.shape)) # - Plot up as a stack-plot... # Normalise to total and conver to % (*100) arr = (arr / arr.sum(axis=0)) * 100 # Add zeros array to beginning (for stack/area plot ) arr_ = np.vstack((np.zeros((1, arr.shape[-1])), arr)) # Setup figure fig, ax = plt.subplots(figsize=(9, 6), dpi=dpi, facecolor='w', edgecolor='w') # Plot by family for n, label in enumerate(sorted_fam_names): # Print out some summary stats if verbose: print(n, label, arr[:n, 0].sum(axis=0), arr[:n + 1, 0].sum(axis=0), end=' ') print(arr[:n, :].sum(), arr[:n + 1, :].sum()) print([i.shape for i in (Data_rc['alt'], arr)]) # Fill between X plt.fill_betweenx(Data_rc['alt'], arr[:n, :].sum(axis=0), arr[:n + 1, :].sum(axis=0), color=cmap(1. * n / len(sorted_fam_names))) # Plot the line too plt.plot( arr[:n, :].sum(axis=0), Data_rc['alt'], label=label, color=cmap(1. * n / len(sorted_fam_names)), alpha=0, lw=lw, ) # Beautify the plot plt.xlim(0, 100) xlabel = '% of total O$_{\\rm x}$ loss' plt.xlabel(xlabel, fontsize=fontsize * .75) plt.yticks(fontsize=fontsize * .75) plt.xticks(fontsize=fontsize * .75) plt.ylabel('Altitude (km)', fontsize=fontsize * .75) leg = plt.legend(loc='upper center', fontsize=fontsize) # Update lengnd line sizes ( + update line sizes) for legobj in leg.legendHandles: legobj.set_linewidth(lw / 2) legobj.set_alpha(1) plt.ylim(Data_rc['alt'][0], Data_rc['alt'][-1]) # Limit plot y axis to 12km? if limit_plotted_alititude: plt.ylim(Data_rc['alt'][0], 12) # Show plot or save? if save_plot: filename = 'Ox_loss_plot_by_vertical_{}_{}'.format(Mechanism, suffix) plt.savefig(filename, dpi=dpi) if show_plot: plt.show()
# --- Packages import AC_tools as AC import numpy as np from time import gmtime, strftime import time import glob # --- Settings res='0.25x0.3125' #res='0.5x0.666' tpwd = AC.get_dir('tpwd') #start_year, end_year = 2006,2007 #start_year, end_year = 2012,2013 #start_year, end_year = 2014,2017 start_year, end_year = 2015,2017 debug = False #ver='1.7' ver='3.0' pf_locs_file = 'EU_GRID_{}.dat'.format( res ) #pf_locs_file ='ROW_GRID.dat' # --- Read in site Detail location = AC.readin_gaw_sites( pf_locs_file, all=True ) numbers, locs, lats, lons, pres = [ location[:,i] for i in range(5) ] lats, lons, pres = [ np.float64(i) for i in lats, lons, pres ] locs = np.array(locs) print lats[0:4] # --- Set Variables #slist = pf_var( 'slist_v9_2_NREA_red_NOy', ver=ver )#'slist_v9_2_NREA_red' ) #slist = pf_var( 'slist_ClearFlo', ver=ver )
def add_attrs2target_ds(ds, convert_to_kg_m3=False, attrs_dict={}, varname='Ensemble_Monthly_mean', target='Iodide', add_global_attrs=True, add_varname_attrs=True, rm_spaces_from_vars=False, global_attrs_dict={}, convert2HEMCO_time=False): """ Update attributes for iodide dataset saved as NetCDF Parameters ------- convert_to_kg_m3 (bool): convert the output units to kg/m3 species (str): chemical name of species to use for kg/m3 conversion rm_spaces_from_vars (bool): remove spaces from variable names global_attrs_dict (dict): dictionary of global attributes convert2HEMCO_time (bool): convert to a HEMCO-compliant time format add_global_attrs (bool): add global attributes to dataset add_varname_attrs (bool): add variable attributes to dataset varname (str): variable name to make changes to Returns ------- (xr.dataset) """ # Coordinate and global values if add_varname_attrs: # Convert the units? if convert_to_kg_m3: # get surface array # print('Update of units not implimented') # sys.exit() # Convert units from nM to kg/m3 (=> M => mass => /m3 => /kg) ds[varname] = ds[varname] / 1E9 * AC.species_mass( species) * 1E3 / 1E3 # for variable attrs_dict['units'] = "kg/m3" attrs_dict['units_longname'] = "kg({})/m3".format(target) else: # for variable attrs_dict['units'] = "nM" attrs_dict['units_longname'] = "Nanomolar" # Add COARDS variables attrs_dict['add_offset'] = int(0) attrs_dict['scale_factor'] = int(1) attrs_dict['missing_value'] = float(-1e-32) attrs_dict['_FillValue'] = float(-1e-32) ds[varname].attrs = attrs_dict # Update Name for use in external NetCDFs if rm_spaces_from_vars: for var_ in ds.data_vars: if ' ' in var_: print('removing spaces from {}'.format(var_)) new_varname = var_.replace(' ', '_') # make new var as a copy of the old one ds[new_varname] = ds[var_].copy() # now remove the old var del ds[var_] else: pass # Coordinate and global values if add_global_attrs: # for lat... attrs_dict = ds['lat'].attrs attrs_dict['long_name'] = "latitude" attrs_dict['units'] = "degrees_north" attrs_dict["standard_name"] = "latitude" attrs_dict["axis"] = "Y" ds['lat'].attrs = attrs_dict # And lon... attrs_dict = ds['lon'].attrs attrs_dict['long_name'] = "longitude" attrs_dict['units'] = "degrees_east" attrs_dict["standard_name"] = "longitude" attrs_dict["axis"] = "X" ds['lon'].attrs = attrs_dict # And time attrs_dict = ds['time'].attrs attrs_dict["standard_name"] = "time" attrs_dict['long_name'] = attrs_dict["standard_name"] attrs_dict["axis"] = "T" if convert2HEMCO_time: attrs_dict['units'] = 'hours since 2000-01-01 00:00:00' attrs_dict['calendar'] = 'standard' # Assume a generic year REFdatetime = datetime.datetime(2000, 1, 1) dts = [datetime.datetime(2000, i, 1) for i in range(1, 13)] hours = [(i - REFdatetime).days * 24. for i in dts] # times = [ AC.add_months(REFdatetime, int(i) ) for i in range(13) ] ds['time'].values = hours ds['time'].attrs = attrs_dict # Add details to the global attribute dictionary History_str = 'Last Modified on: {}' global_attrs_dict['History'] = History_str.format( strftime("%B %d %Y", gmtime())) global_attrs_dict['Conventions'] = "COARDS" global_attrs_dict['Main parameterisation variable'] = varname global_attrs_dict['format'] = 'NetCDF-4' ds.attrs = global_attrs_dict return ds
def plot_ODR_window_plot(params=[], show_plot=False, df=None, testset='Test set (strat. 20%)', units='pM', target='Iodide', context="paper", xlim=None, ylim=None, dpi=720, verbose=False): """ Show the correlations between obs. and params. as window plot Parameters ------- target (str): Name of the target variable (e.g. iodide) testset (str): Testset to use, e.g. stratified sampling over quartiles for 20%:80% dpi (int): resolution to use for saved image (dots per square inch) RFR_dict (dict): dictionary of core variables and data context (str): seaborn context to use for plotting (e.g. paper, poster, talk...) show_plot (bool): show the plot on screen df (pd.DataFrame): dataframe containing target and feature variables units (str): units of the target in the dataframe xlim (tuple): limits for plotting x axis ylim (tuple): limits for plotting y axis Returns ------- (None) """ # Make sure a dataFrame has been provided assert type(df) == pd.DataFrame, "Please provide DataFrame ('df') with data" # Setup seabonr plotting environment import seaborn as sns sns.set(color_codes=True) if context == "paper": sns.set_context("paper") else: sns.set_context("talk", font_scale=1.0) # Name of PDF to save plots to savetitle = 'Oi_prj_point_for_point_comparison_obs_vs_model_ODR_WINDOW' pdff = AC.plot2pdfmulti(title=savetitle, open=True, dpi=dpi) # label to use for taget on plots target_label = '[{}$_{}$]'.format(target, 'aq') # Set location for alt_text f_size = 10 N = int(df.shape[0]) # Split data into groups dfs = {} # Entire dataset dfs['Entire'] = df.copy() # Testdataset dfs['Withheld'] = df.loc[df[testset] == True, :].copy() dsplits = dfs.keys() # Assign colors to splits CB_color_cycle = AC.get_CB_color_cycle() color_d = dict(zip(dsplits, CB_color_cycle)) # Intialise figure and axis fig, axs = plt.subplots(1, 3, sharex=True, sharey=True, dpi=dpi, figsize=(11, 4)) # Loop by param and compare against whole dataset for n_param, param in enumerate(params): # set axis to use ax = axs[n_param] # Use the same asecpt for X and Y ax.set_aspect('equal') # Add a title the plots ax.text(0.5, 1.05, param, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes) # Add a 1:1 line x_121 = np.arange(ylim[0]-(ylim[1]*0.05),ylim[1]*1.05 ) ax.plot(x_121, x_121, alpha=0.5, color='k', ls='--') # Plot up data by dataset split for nsplit, split in enumerate(dsplits): # select the subset of the data df = dfs[split].copy() # Remove any NaNs df = df.dropna() # get X X = df[target].values # get Y Y = df[param].values # get N N = float(df.shape[0]) # get RMSE RMSE = np.sqrt(((Y-X)**2).mean()) # Plot up just the entire and testset data if split in ('Entire', 'Withheld'): ax.scatter(X, Y, color=color_d[split], s=3, facecolor='none') # add ODR line xvalues, Y_ODR = AC.get_linear_ODR(x=X, y=Y, xvalues=x_121, return_model=False, maxit=10000) myoutput = AC.get_linear_ODR(x=X, y=Y, xvalues=x_121, return_model=True, maxit=10000) # print out the parameters from the ODR if verbose: print(param, split, myoutput.beta) ax.plot(xvalues, Y_ODR, color=color_d[split]) # Add RMSE ( and N value as alt text ) alt_text_x = 0.01 alt_text_y = 0.95-(0.05*nsplit) # alt_text = 'RMSE={:.1f} ({}, N={:.0f})'.format( RMSE, split, N ) alt_text = 'RMSE={:.1f} ({})'.format(RMSE, split) ax.annotate(alt_text, xy=(alt_text_x, alt_text_y), textcoords='axes fraction', fontsize=f_size, color=color_d[split]) # Beautify the plot/figure plt.xlim(xlim) plt.ylim(ylim) ax.set_xlabel('Obs. {} ({})'.format(target_label, units)) if (n_param == 0): ax.set_ylabel('Parameterised {} ({})'.format(target_label, units)) # Adjust the subplots if context == "paper": top = 0.94 bottom = 0.1 left = 0.05 right = 0.975 wspace = 0.075 else: top = 0.94 bottom = 0.14 left = 0.075 right = 0.975 wspace = 0.075 fig.subplots_adjust(top=top, right=right, left=left, bottom=bottom, wspace=wspace) # Save the plot AC.plot2pdfmulti(pdff, savetitle, dpi=dpi) # Save entire pdf AC.plot2pdfmulti(pdff, savetitle, close=True, dpi=dpi) plt.savefig(savetitle, dpi=dpi) if show_plot: plt.show() plt.close()
def plot_up_surface_emissions(dsDH=None, runs=None, show_plot=False, wds=None, dpi=320): """ Plot up emissions using HEMCO NetCDF files """ import cartopy.crs as ccrs import matplotlib.pyplot as plt # names of runs to plot up? if isinstance(wds, type(None)): wds = get_run_dict4EGU_runs() if isinstance(runs, type(None)): runs = list(wds.keys()) # - Add aggregated values to ds OrgVars = [ 'EmisCH2IBr_Ocean', 'EmisCH2ICl_Ocean', 'EmisCH2I2_Ocean', 'EmisCH3I_Ocean', ] InOrgVars = [ 'EmisI2_Ocean', 'EmisHOI_Ocean', ] vars2use = OrgVars + InOrgVars # Aggregate variables to use? TotalVar = 'I_Total' InOrgVar = 'Inorg_Total' OrgVar = 'Org_Total' # Setup the colourbar to use Divergent_cmap = plt.get_cmap('RdBu_r') cmap = AC.get_colormap(np.arange(10)) # loop my run and add values for run in runs: # which dataset to use? print(run) ds = dsDH[run] # Add Inorg and org subtotals to array ds = add_Inorg_and_Org_totals2array(ds=ds) # Calculate totals # template off the first species ds[TotalVar] = dsDH[run][vars2use[0]].copy() # Sum values to this arr = ds[TotalVar].values for var_ in vars2use[1:]: print(var_) arr = arr + dsDH[run][var_].values ds[TotalVar].values = arr attrs = ds[TotalVar].attrs attrs['long_name'] = TotalVar ds[TotalVar].attrs = attrs # Setup PDF to save plot to savetitle = 'Oi_prj_emissions_diff_plots_EGU_runs' dpi = 320 pdff = AC.plot2pdfmulti(title=savetitle, open=True, dpi=dpi) # - Plot up emissions spatial distribution of total emissions for run in runs: print(run) # dataset to plot ds = dsDH[run][[TotalVar]] # use annual sum of emissions ds = ds.sum(dim='time') # - Loop and plot species fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto') ds[TotalVar].plot.imshow(x='lon', y='lat', ax=ax, cmap=cmap, transform=ccrs.PlateCarree()) # Add a title to the plot to the plot PtrStr = "Total iodine emissions (Gg I) in '{}'" PtrStr += "\n(max={:.1f}, min={:.1f}, sum={:.1f})" sum_ = float(ds[TotalVar].sum().values) max_ = float(ds[TotalVar].max().values) min_ = float(ds[TotalVar].min().values) plt.title(PtrStr.format(run, max_, min_, sum_)) # Beautify the plot ax.coastlines() ax.set_global() # Save to PDF and close plot AC.plot2pdfmulti(pdff, savetitle, dpi=dpi) if show_plot: plt.show() plt.close() # - Plot up emissions spatial distribution of inorg emissions runs2plot = [i for i in runs if (i != 'No_HOI_I2')] for run in runs2plot: print(run) # dataset to plot ds = dsDH[run][[InOrgVar]] # use annual sum of emissions ds = ds.sum(dim='time') # - Loop and plot species fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto') ds[InOrgVar].plot.imshow(x='lon', y='lat', ax=ax, cmap=cmap, transform=ccrs.PlateCarree()) # Add a title to the plot PtrStr = "Total Inorganic iodine emissions (Gg I) in '{}'" PtrStr += "\n(max={:.1f}, min={:.1f}, sum={:.1f})" sum_ = float(ds[InOrgVar].sum().values) max_ = float(ds[InOrgVar].max().values) min_ = float(ds[InOrgVar].min().values) plt.title(PtrStr.format(run, max_, min_, sum_)) # Beautify the plot ax.coastlines() ax.set_global() # Save to PDF and close plot AC.plot2pdfmulti(pdff, savetitle, dpi=dpi) if show_plot: plt.show() plt.close() # - Plot up emissions spatial distribution inorg emissions (% of total) runs2plot = [i for i in runs if (i != 'No_HOI_I2')] for run in runs2plot: print(run) # dataset to plot ds = dsDH[run][[InOrgVar, TotalVar]] # use annual sum of emissions ds = ds.sum(dim='time') # Calculate the difference (perecent) DIFFvar = 'Inorg/Total' ds[DIFFvar] = ds[InOrgVar].copy() ds[DIFFvar].values = ds[InOrgVar].values / ds[TotalVar].values * 100 # Loop and plot species fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto') ds[DIFFvar].plot.imshow(x='lon', y='lat', ax=ax, cmap=cmap, transform=ccrs.PlateCarree()) # Add a title to the plot PtrStr = "Total Inorganic iodine emissions (% of total) in '{}' \n" PtrStr += '(max={:.1f}, min={:.1f})' max_ = float(ds[DIFFvar].max().values) min_ = float(ds[DIFFvar].min().values) plt.title(PtrStr.format(run, max_, min_)) # Beautify the plot ax.coastlines() ax.set_global() # Save to PDF and close plot AC.plot2pdfmulti(pdff, savetitle, dpi=dpi) if show_plot: plt.show() plt.close() # - plot up emissions as a % of REF (Chance2014) REF = 'Chance2014' # runs2plot = [i for i in runs if (i != REF)] # runs2plot = [i for i in runs if (i != 'No_HOI_I2')] runs2plot = ['ML_Iodide'] for run in runs2plot: print(run) # dataset to plot (use annual sum of emissions) ds = dsDH[run][[InOrgVar]].sum(dim='time') dsREF = dsDH[REF][[InOrgVar]].sum(dim='time') # DIFFvar = 'Inorg/Inorg({})'.format(REF) ds[DIFFvar] = ds[InOrgVar].copy() ds[DIFFvar].values = ds[InOrgVar].values / dsREF[InOrgVar].values * 100 # - Loop and plot species fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto') ds[DIFFvar].plot.imshow( x='lon', y='lat', # vmin=1, vmax=5, vmin=0, vmax=200, ax=ax, cmap=cmap, transform=ccrs.PlateCarree()) # Add a title to the plot PtrStr = "Total Inorganic iodine emissions in '{}'\n as % of {}" PtrStr += '(max={:.1f}, min={:.1f})' max_ = float(ds[DIFFvar].max().values) min_ = float(ds[DIFFvar].min().values) plt.title(PtrStr.format(run, REF, max_, min_)) # Beautify the plot ax.coastlines() ax.set_global() # Save to PDF and close plot AC.plot2pdfmulti(pdff, savetitle, dpi=dpi) if show_plot: plt.show() plt.close() # - plot up emissions as a % of REF (Macdonald2014) REF = 'Macdonald2014' # runs2plot = [i for i in runs if (i != REF)] # runs2plot = [i for i in runs if (i != 'No_HOI_I2')] runs2plot = ['ML_Iodide'] for run in runs2plot: print(run) # dataset to plot (use annual sum of emissions) ds = dsDH[run][[InOrgVar]].sum(dim='time') dsREF = dsDH[REF][[InOrgVar]].sum(dim='time') # DIFFvar = 'Inorg/Inorg({})'.format(REF) ds[DIFFvar] = ds[InOrgVar].copy() ds[DIFFvar].values = ds[InOrgVar].values / dsREF[InOrgVar].values * 100 # - Loop and plot species fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto') ds[DIFFvar].plot.imshow(x='lon', y='lat', vmin=0, vmax=200, ax=ax, cmap=cmap, transform=ccrs.PlateCarree()) # Add a title to the plot PtrStr = "Total Inorganic iodine emissions in '{}'\n as % of {}" PtrStr += '(max={:.1f}, min={:.1f})' max_ = float(ds[DIFFvar].max().values) min_ = float(ds[DIFFvar].min().values) plt.title(PtrStr.format(run, REF, max_, min_)) # Beautify the plot ax.coastlines() ax.set_global() # Save to PDF and close plot AC.plot2pdfmulti(pdff, savetitle, dpi=dpi) if show_plot: plt.show() plt.close() # - plot up emissions as a % of REF (Chance2014) REF = 'Chance2014' # runs2plot = [i for i in runs if (i != REF)] # runs2plot = [i for i in runs if (i != 'No_HOI_I2')] runs2plot = ['ML_Iodide'] for run in runs2plot: print(run) # dataset to plot (use annual sum of emissions) ds = dsDH[run][[InOrgVar]].sum(dim='time') dsREF = dsDH[REF][[InOrgVar]].sum(dim='time') # DIFFvar = 'Inorg/Inorg({})'.format(REF) ds[DIFFvar] = ds[InOrgVar].copy() ds[DIFFvar].values = ds[InOrgVar].values - dsREF[InOrgVar].values ds[DIFFvar].values = ds[DIFFvar].values / dsREF[InOrgVar].values * 100 # - Loop and plot species fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto') ds[DIFFvar].plot.imshow( x='lon', y='lat', # vmin=1, vmax=5, vmin=-100, vmax=100, ax=ax, # cmap=cmap, cmap=Divergent_cmap, transform=ccrs.PlateCarree()) # Add a title to the plot PtrStr = "Total Inorganic iodine emissions in '{}'\n as % of {}" PtrStr += '(max={:.1f}, min={:.1f})' max_ = float(ds[DIFFvar].max().values) min_ = float(ds[DIFFvar].min().values) plt.title(PtrStr.format(run, REF, max_, min_)) # Beautify the plot ax.coastlines() ax.set_global() # Save to PDF and close plot AC.plot2pdfmulti(pdff, savetitle, dpi=dpi) if show_plot: plt.show() plt.close() # - plot up emissions as a % of REF (Macdonald2014) REF = 'Macdonald2014' # runs2plot = [i for i in runs if (i != REF)] # runs2plot = [i for i in runs if (i != 'No_HOI_I2')] runs2plot = ['ML_Iodide'] for run in runs2plot: print(run) # dataset to plot (use annual sum of emissions) ds = dsDH[run][[InOrgVar]].sum(dim='time') dsREF = dsDH[REF][[InOrgVar]].sum(dim='time') # DIFFvar = 'Inorg/Inorg({})'.format(REF) ds[DIFFvar] = ds[InOrgVar].copy() ds[DIFFvar].values = ds[InOrgVar].values - dsREF[InOrgVar].values ds[DIFFvar].values = ds[DIFFvar].values / dsREF[InOrgVar].values * 100 # - Loop and plot species fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111, projection=ccrs.PlateCarree(), aspect='auto') ds[DIFFvar].plot.imshow(x='lon', y='lat', vmin=-100, vmax=100, ax=ax, cmap=Divergent_cmap, transform=ccrs.PlateCarree()) # Add a title to the plot PtrStr = "Total Inorganic iodine emissions in '{}'\n as % of {}" PtrStr += '(max={:.1f}, min={:.1f})' max_ = float(ds[DIFFvar].max().values) min_ = float(ds[DIFFvar].min().values) plt.title(PtrStr.format(run, REF, max_, min_)) # Beautify the plot ax.coastlines() ax.set_global() # Save to PDF and close plot AC.plot2pdfmulti(pdff, savetitle, dpi=dpi) if show_plot: plt.show() plt.close() # -- Save entire pdf AC.plot2pdfmulti(pdff, savetitle, close=True, dpi=dpi)
def plot_up_PDF_of_obs_and_predictions_WINDOW(show_plot=False, params=[], testset='Test set (strat. 20%)', target='Iodide', df=None, units='pM', xlim=None, dpi=320): """ Plot up CDF and PDF plots to explore point-vs-point data Parameters ------- target (str): Name of the target variable (e.g. iodide) testset (str): Testset to use, e.g. stratified sampling over quartiles for 20%:80% dpi (int): resolution to use for saved image (dots per square inch) show_plot (bool): show the plot on screen df (pd.DataFrame): DataFrame of data units (str): units of the target in the dataframe xlim (tuple): limits for plotting x axis ylim (tuple): limits for plotting y axis Returns ------- (None) """ import seaborn as sns sns.set(color_codes=True) sns.set_context("paper", font_scale=0.75) # Make sure a dataFrame has been provided assert type(df) == pd.DataFrame, "Please provide DataFrame ('df') with data" # Get a dictionary of different dataset splits dfs = {} # Entire dataset dfs['Entire'] = df.copy() # Testdataset dfs['All (withheld)'] = df.loc[df[testset] == True, :].copy() # Maintain ordering of plotting datasets = dfs.keys() # Setup color dictionary CB_color_cycle = AC.get_CB_color_cycle() color_d = dict(zip(params, CB_color_cycle)) # set a name of file to save data to savetitle = 'Oi_prj_point_for_point_comparison_obs_vs_model_PDF_WINDOW' # - Plot up CDF and PDF plots for the dataset and residuals fig = plt.figure(dpi=dpi) nrows = len(datasets) ncols = 2 for n_dataset, dataset in enumerate(datasets): # set Axis for abosulte PDF axn = np.arange(1, (nrows*ncols)+1)[::ncols][n_dataset] ax1 = fig.add_subplot(nrows, ncols, axn) # Get data df = dfs[dataset] # Drop NaNs df = df.dropna() # Numer of data points N_ = df.shape print(dataset, N_) # Only add an axis label on to the bottommost plots axlabel = None if n_dataset in np.arange(1, (nrows*ncols)+1)[::ncols]: axlabel = '[{}$_{}$] ({})'.format( target, '{aq}', units ) # - Plot up PDF plots for the dataset # Plot observations var_ = 'Obs.' obs_arr = df[target].values ax = sns.distplot(obs_arr, axlabel=axlabel, label=var_, color='k', ax=ax1) # Loop and plot model values for param in params: arr = df[param].values ax = sns.distplot(arr, axlabel=axlabel, label=param, color=color_d[param], ax=ax1) # Force y axis extent to be correct ax1.autoscale() # Force x axis to be constant ax1.set_xlim(xlim) # Beautify the plot/figure ylabel = 'Frequency \n ({})' ax1.set_ylabel(ylabel.format(dataset)) # Add legend to first plot if (n_dataset == 0): plt.legend() ax1.set_title('Concentration') # Plot up PDF plots for the residual dataset # set Axis for abosulte PDF axn = np.arange(1, (nrows*ncols)+1)[1::ncols][n_dataset] ax2 = fig.add_subplot(nrows, ncols, axn) # get observations obs_arr = df[target].values # Loop and plot model values for param in params: arr = df[param].values - obs_arr ax = sns.distplot(arr, axlabel=axlabel, label=param, color=color_d[param], ax=ax2) # Force y axis extent to be correct ax2.autoscale() # Force x axis to be constant ax2.set_xlim(-xlim[1], xlim[1]) # Add legend to first plot if (n_dataset == 0): ax2.set_title('Bias') # Save whole figure plt.savefig(savetitle)
def regrid_output_to_common_res_as_NetCDFs(topmodels=None, target='Iodide', rm_Skagerrak_data=False, dsA=None, just_1x1_grids=False, debug=False): """ Regrid output various common model resolutsion Parameters ------- topmodels (list): List of models to include in re-gridded output rm_Skagerrak_data (bool): remove the single data from the Skagerrak region dsA (xr.Dataset): data to regrid and save to NetCDFs just_1x1_grids (bool): Just regridd to the 1x1 (for debugging) debug (bool): perform debugging and verbose printing? Returns ------- (None) """ # Get file and location to regrid if rm_Skagerrak_data: ext_str = '_No_Skagerrak' else: ext_str = '' if isinstance(dsA, type(None)): file2regrid = 'Oi_prj_predicted_{}_0.125x0.125{}.nc'.format( target, ext_str) folder = utils.get_file_locations('data_root') dsA = xr.open_dataset(folder + file2regrid) # Add LWI to array try: dsA['LWI'] except KeyError: dsA = add_LWI2array(dsA, res='0.125x0.125', inc_booleans_and_area=False) # Which grids should be regridded to? grids = reses2regrid2(just_1x1_grids=just_1x1_grids) vars2regrid = list(dsA.data_vars) # Remove any models? if not isinstance(topmodels, type(None)): # remove the RFRs that are not in the topmodels list vars2pop = [] for var2use in vars2regrid: if ('RFR' in var2use): if (var2use not in topmodels): vars2pop += [vars2regrid.index(var2use)] # vars2regrid.pop(vars2regrid.index(var2use)) if debug: print('Deleting var:', var2use, vars2regrid.index(var2use)) # Now remove using pop method [vars2regrid.pop(i) for i in sorted(vars2pop)[::-1]] # Regrid output for grid in grids.keys(): # Create a dataset to re-grid into ds_out = xr.Dataset({ # 'time': ( ['time'], dsA['time'] ), 'lat': (['lat'], grids[grid]['lat']), 'lon': (['lon'], grids[grid]['lon']), }) # Create a regidder (to be reused ) regridder = xe.Regridder(dsA, ds_out, 'bilinear', reuse_weights=True) # Loop and regrid variables ds_l = [] for var2use in vars2regrid: # Create a dataset to re-grid into ds_out = xr.Dataset({ # 'time': ( ['time'], dsA['time'] ), 'lat': (['lat'], grids[grid]['lat']), 'lon': (['lon'], grids[grid]['lon']), }) # Get a DataArray dr = dsA[var2use] # Build regridder dr_out = regridder(dr) # Important note: Extra dimensions must be on the left, i.e. (time, lev, lat, lon) is correct but (lat, lon, time, lev) would not work. Most data sets should have (lat, lon) on the right (being the fastest changing dimension in the memory). If not, use DataArray.transpose or numpy.transpose to preprocess the data. # Exactly the same as input? xr.testing.assert_identical(dr_out['time'], dsA['time']) # Save variable ds_l += [dr_out] # Combine variables ds = xr.Dataset() for n, var2use in enumerate(vars2regrid): ds[var2use] = ds_l[n] # Add atributes Vars2NotRename = 'LWI', 'LonghurstProvince' if var2use not in Vars2NotRename: ds = add_attrs2target_ds(ds, add_global_attrs=False, varname=var2use) else: # Update attributes too attrs = ds['LWI'].attrs.copy() attrs['long_name'] = 'Land/Water/Ice index' attrs['Detail'] = 'A Land-Water-Ice mask. It is 1 over continental areas, 0 over open ocean, and 2 over seaice covered ocean.' ds['LWI'].attrs = attrs # Clean up regridder.clean_weight_file() # Make sure the file has appropriate attributes ds = add_attrs2target_ds(ds, add_varname_attrs=False) # Time values ds = update_time_in_NetCDF2save(ds) # Save the file filename = 'Oi_prj_output_{}_field_{}'.format(target, grid) filename = AC.rm_spaces_and_chars_from_str(filename) ds.to_netcdf(filename+'.nc')
def calc_fam_loss_by_route(wd=None, fam='LOx', ref_spec='O3', rm_strat=True, Mechanism='Halogens', Ox_loss_dict=None, weight_by_molecs=False, full_vertical_grid=False, CODE_wd=None, verbose=True, debug=False): """ Build an Ox budget table like table 4 in Sherwen et al 2016b Parameters ------- fam (str): tagged family to track (already compiled in KPP mechanism) ref_spec (str): reference species to normalise to wd (str): working directory ("wd") of model output CODE_wd (str): root of code directory containing the tagged KPP mechanism rm_strat (bool): (fractionally) replace values in statosphere with zeros Ox_loss_dict (dict), dictionary of Ox loss variables/data (from get_Ox_loss_dicts) Mechanism (str): name of the KPP mechanism (and folder) of model output weight_by_molecs (bool): weight grid boxes by number of molecules full_vertical_grid (bool): use the full vertical grid for analysis debug, verbose (bool): switches to turn on/set verbosity of output to screen Returns ------- (None) Notes ----- - AC_tools includes equivlent functions for smvgear mechanisms """ # - Local variables/ Plot extraction / Settings if isinstance(Ox_loss_dict, type(None)): Ox_loss_dict = AC.get_Ox_loss_dicts( wd=wd, CODE_wd=CODE_wd, fam=fam, ref_spec=ref_spec, Mechanism=Mechanism, rm_strat=rm_strat, weight_by_molecs=weight_by_molecs, full_vertical_grid=full_vertical_grid, ) # Extract variables from data/variable dictionary fam_dict = Ox_loss_dict['fam_dict'] ars = Ox_loss_dict['ars'] RR_dict_fam_stioch = Ox_loss_dict['RR_dict_fam_stioch'] RR_dict = Ox_loss_dict['RR_dict'] tags2_rxn_num = Ox_loss_dict['tags2_rxn_num'] tags = Ox_loss_dict['tags'] tags_dict = Ox_loss_dict['tags_dict'] halogen_fams = Ox_loss_dict['halogen_fams'] # --- Do analysis on model output # Sum the total mass fluxes for each reaction ars = [i.sum() for i in ars] # Sum all the Ox loss routes total = np.array(ars).sum() # Create a dictionary of values of interest dict_ = { 'Total flux': [i.sum(axis=0) for i in ars], 'Total of flux (%)': [i.sum(axis=0) / total * 100 for i in ars], 'Family': [fam_dict[i] for i in tags], 'rxn #': [tags2_rxn_num[i] for i in tags], 'rxn str': [RR_dict[tags2_rxn_num[i]] for i in tags], 'stoich': [RR_dict_fam_stioch[tags2_rxn_num[i]] for i in tags], 'tags': tags, } # Create pandas dataframe df = pd.DataFrame(dict_) # Sort the data and have a look... df = df.sort_values('Total flux', ascending=False) if debug: print(df.head()) # Sort values again and save... df = df.sort_values(['Family', 'Total flux'], ascending=False) if debug: print(df.head()) df.to_csv('Ox_loss_budget_by_rxn_for_{}_mechanism.csv'.format(Mechanism)) # Now select the most important routes grp = df[['Family', 'Total flux']].groupby('Family') total = grp.sum().sum() # Print the contribution by family to screen print((grp.sum() / total * 100)) # Print the contribution of all the halogen routes hal_LOx = (grp.sum().T[halogen_fams].sum().sum() / total * 100).values[0] if verbose: print(('Total contribution of halogens is: {:.2f} %'.format(hal_LOx))) # Add Halogen total and general total to DataFrame dfFam = grp.sum().T dfFam['Total'] = dfFam.sum().sum() dfFam['Halogens'] = dfFam[halogen_fams].sum().sum() # Update units to Tg O3 dfFam = dfFam.T / 1E12 # return dictionaries of LOx by reaction or by family (in Tg O3) if rtn_by_rxn: return df / 1E12 if rtn_by_fam: return dfFam
def mk_NetCDF_from_productivity_data(): """ Convert productivity .csv file (Behrenfeld and Falkowski, 1997) into a NetCDF file """ # Location of data (update to use public facing host) folder = utils.get_file_locations('data_root') + '/Productivity/' # Which file to use? filename = 'productivity_behrenfeld_and_falkowski_1997_extrapolated.csv' # Setup coordinates lon = np.arange(-180, 180, 1/6.) lat = np.arange(-90, 90, 1/6.) lat = np.append(lat, [90]) # Setup time varname = 'vgpm' months = np.arange(1, 13) # Extract data df = pd.read_csv(folder+filename, header=None) print(df.shape) # Extract data by month da_l = [] for n in range(12): # Assume the data is in blocks by longitude? arr = df.values[:, n*1081: (n+1)*1081].T[None, ...] print(arr.shape) da_l += [xr.Dataset( data_vars={varname: (['time', 'lat', 'lon', ], arr)}, coords={'lat': lat, 'lon': lon, 'time': [n]})] # Concatenate to data xr.Dataset ds = xr.concat(da_l, dim='time') # Update time ... sdate = datetime.datetime(1985, 1, 1) # Climate model tiem ds['time'] = [AC.add_months(sdate, i-1) for i in months] # Update to hours since X hours = [(AC.dt64_2_dt([i])[0] - sdate).days * 24. for i in ds['time'].values] ds['time'] = hours # Add units attrs_dict = {'units': 'hours since 1985-01-01 00:00:00'} ds['time'].attrs = attrs_dict # Add attributes for variable attrs_dict = { 'long_name': "net primary production", 'units': "mg C / m**2 / day", } ds[varname].attrs = attrs_dict # For latitude... attrs_dict = { 'long_name': "latitude", 'units': "degrees_north", "standard_name": "latitude", "axis": "Y", } ds['lat'].attrs = attrs_dict # And longitude... attrs_dict = { 'long_name': "longitude", 'units': "degrees_east", "standard_name": "longitude", "axis": "X", } ds['lon'].attrs = attrs_dict # Add extra global attributes global_attribute_dictionary = { 'Title': 'Sea-surface productivity (Behrenfeld and Falkowski, 1997)', 'Author': 'Tomas Sherwen ([email protected])', 'Notes': "Data extracted from OCRA and extrapolated to poles by Martin Wadley. NetCDF contructed using xarray (xarray.pydata.org) by Tomas Sherwen. \n NOTES from oringal site (http://orca.science.oregonstate.edu/) from 'based on the standard vgpm algorithm. npp is based on the standard vgpm, using modis chl, sst4, and par as input; clouds have been filled in the input data using our own gap-filling software. For citation, please reference the original vgpm paper by Behrenfeld and Falkowski, 1997a as well as the Ocean Productivity site for the data.' ", 'History': 'Last Modified on:' + strftime("%B %d %Y", gmtime()), 'Conventions': "COARDS", } ds.attrs = global_attribute_dictionary # Save to NetCDF filename = 'productivity_behrenfeld_and_falkowski_1997_extrapolated.nc' ds.to_netcdf(filename, unlimited_dims={'time': True})
import AC_tools as AC # Download the example data if it is not already downloaded. from AC_tools.Scripts import get_data_files # Specify the working directory wd = "../data" # Get the GeosChem species data from the wd my_data = AC.get_GC_output(wd, species='O3') # Get a 2d slice from the 3d array my_data = my_data[:, :, 0, 0] # Turn from part per part to part per billion my_data = my_data * 1E9 # Create the plot AC.map_plot(my_data) # Save the plot and show it. AC.save_plot("my_plot") AC.show_plot()
# Years output is required for? start_year, end_year = 2014,2016 # debug? debug = False # output all reactions? all_REAs = False # # Which (halogen) code version is being used? #ver = '1.6' # Iodine simulation in v9-2 #ver = '2.0' # Iodine + Bromine simulation ver = '3.0' # Cl-Br-I simulation # add extra spacing? (needed for large amounts of output, like nested grids) Extra_spacings =False # --- Read in site Detail numbers, lats, lons, pres, locs = AC.readin_gaw_sites( pf_loc_dat_file ) # make sure the format is numpt float 64 lats, lons, pres = [ np.float64(i) for i in lats, lons, pres ] print lats[0:4] # --- Set Variables slist = AC.pf_var('slist', ver=ver )#_REAs_all') # extra scpaes need for runs with many points if Extra_spacings: pstr = '{:>6} {:<4} {:0>2}-{:0>2}-{:0>4} {:0>2}:{:0>2} {:>6,.2f} {:>7,.2f} {:>7.2f}' endstr = '999999 END 0- 0- 0 0: 0 0.00 0.00 0.00' else: pstr = '{:>5} {:<3} {:0>2}-{:0>2}-{:0>4} {:0>2}:{:0>2} {:>6,.2f} {:>7,.2f} {:>7.2f}' endstr ='99999 END 0- 0- 0 0: 0 0.00 0.00 0.00 '