raw_series = dict() for station, obs in observations.iterrows(): a = obs_data[obs['station']] try: kw = dict(k=10, max_dist=0.04, min_var=0.01) args = cube, tree, obs.lon, obs.lat series, dist, idx = get_nearest_water(*args, **kw) # RuntimeError may occurs, but you should run it again! except ValueError as e: log.warning(e) continue if not series: status = "Found Land" else: raw_series.update({obs['station']: series}) series = as_series(series) status = "Found Water" ax.plot(lon[idx], lat[idx], 'g.') log.info('[{}] {}'.format(status, obs.name)) if raw_series: # Save cube. for station, cube in raw_series.items(): cube = standardize_fill_value(cube) cube = add_station(cube, station) try: cube = iris.cube.CubeList(raw_series.values()).merge_cube() except MergeError as e: log.warning(e) ensure_timeseries(cube)
def save_results(regridded_profiles, tracer_columns, params): """ Save (merged) results. """ # SET OUTPUT FILE NAMES out_profiles_basename = string.replace( params['out_profiles_basename'], "*", params['station_name'] ) out_columns_basename = string.replace( params['out_columns_basename'], "*", params['station_name']) if params['out_dir'] is None: params['out_dir'] = os.path.abspath(os.getcwd()) out_profiles_basepath = os.path.join(params['out_dir'], out_profiles_basename) out_columns_basepath = os.path.join(params['out_dir'], out_columns_basename) out_profiles_cubes_fn = out_profiles_basepath + '.nc' out_columns_cubes_fn = out_columns_basepath + '.nc' out_profiles_tables_fn = '.'.join([out_profiles_basepath, params['out_format']]) out_columns_tables_fn = '.'.join([out_columns_basepath, params['out_format']]) # SAVE IN NETCDF FILES (CUBES) # make altitude as dimension coord for profile_cube in regridded_profiles: if isinstance(profile_cube.coord('altitude'), iris.coords.AuxCoord): iris_tools.permute_dim_aux_coords(profile_cube, 'model_level_number', 'altitude') for cube in regridded_profiles: iris_tools.fix_cube_attributes_vals(cube) for cube in tracer_columns: iris_tools.fix_cube_attributes_vals(cube) iris.save(regridded_profiles, out_profiles_cubes_fn) iris.save(tracer_columns, out_columns_cubes_fn) # SAVE AS DATA TABLES (EXCEL, CSV...) dataframe_profiles = {} for profile in regridded_profiles: profile_cube = profile.copy() # there must be only one defined dimension coordinate for each # cube dimension (no auxilliary coordinate (convert iris to pandas) z_dim = profile_cube.coord_dims(profile_cube.coord(name='altitude')) iris_tools.remove_dim_aux_coords(profile_cube, z_dim) dataframe_units = str(profile_cube.units).replace('/', '_') if dataframe_units == 'unknown': dataframe_units = profile_cube.attributes['no_udunits2'] dataframe_name = "{tracer}_{units}".format( tracer=profile_cube.attributes['name'], units=dataframe_units ) # scalar time coordinate if profile_cube.ndim == 1: series = ipandas.as_series(profile) time_coord = profile_cube.coord('time') date = time_coord.units.num2date(time_coord.points[0]) dataframe_profiles[dataframe_name] = pd.DataFrame({date : series}).transpose() # dimensional time coordinate else: dataframe_profiles[dataframe_name] = ipandas.as_data_frame(profile_cube) panel_profiles = pd.Panel(dataframe_profiles).astype(np.float64) series_columns = {} for column in tracer_columns: series_name = "{tracer}_{units}".format( tracer=column.attributes['name'], units='molec_cm-2' ) series_columns[series_name] = ipandas.as_series(column) time_coord = column.coord('time') date = time_coord.units.num2date(time_coord.points) series_columns[series_name].index = date dataframe_columns = pd.DataFrame(series_columns).astype(np.float64) if params['out_format'] in ('hdf', 'hdf5'): panel_profiles.to_hdf(out_profiles_tables_fn, 'profiles') dataframe_columns.to_hdf(out_columns_tables_fn, 'columns') elif params['out_format'] in ('xls', 'xlsx'): panel_profiles.to_excel(out_profiles_tables_fn, 'profiles') dataframe_columns.to_excel(out_columns_tables_fn, 'columns') elif params['out_format'] == 'csv': for pr in panel_profiles: panel_profiles[pr].to_csv('{0}_{1}.csv' .format(out_profiles_basepath, pr)) dataframe_columns.to_csv(out_columns_tables_fn) return ([panel_profiles, dataframe_columns], [out_profiles_cubes_fn, out_columns_cubes_fn, out_profiles_tables_fn, out_columns_tables_fn])
def create_ts_both_graphs(calc, calc2, variable, analysis, save_out): """ Used to plot the control and future run in one graph :param calc: control cube :param calc2: future cube :param variable: variable :param analysis: analysis string :return: Plot """ # Construct figure and axes sns.set(rc={'figure.figsize': (11, 4)}) # Convert cube to pandas series dataframe pd_data, pd_data2 = None, None try: pd_data = as_series(calc) pd_data2 = as_series(calc2) except Exception: print( "WARNING in function create_timeseries: cannot construct timeseries with more than 1-dimension cube." ) return None # Get x values in series indices, indices2 = pd_data.index, pd_data2.index if len(indices) == 1: print( "WARNING in function create_timeseries: cannot construct timeseries of one value." ) return None # Save dates that are in datetime format new_indices, new_indices2 = [], [] for i in range(len(indices)): # Convert cftime.datetime to datetime dt = convert_cftime_datetime(indices[i]) dt2 = convert_cftime_datetime(indices2[i]) new_indices.append(dt) new_indices2.append(dt2) indx, indx2 = pd.DatetimeIndex(new_indices), pd.DatetimeIndex(new_indices2) # df = pd.Series(pd_data.to_numpy(), index=indx) df = pd.DataFrame(data=pd_data.to_numpy(), index=indx, columns=[variable]) df2 = pd.DataFrame(data=pd_data2.to_numpy(), index=indx2, columns=[variable]) # Get names of indexes for which column is > 1e+20 indexNames = df[df[variable] >= 1e+20].index # Delete these row indexes from dataFrame df.drop(indexNames, inplace=True) df.dropna(inplace=True) # Get names of indexes for which column is > 1e+20 indexNames = df2[df2[variable] >= 1e+20].index # Delete these row indexes from dataFrame df2.drop(indexNames, inplace=True) df2.dropna(inplace=True) # Plot the timeseries fig, axs = plt.subplots(1, 1) plt.tight_layout() title_name = "Control and future run of spatial " + analysis + " of " + calc.name( ) # Timeseries axs = df.plot(title=title_name, grid=True, alpha=0.7, color='b', ls='-') df2.plot(ax=axs, color='g', ls='-') axs.legend(["Control", "Future"]) axs.set_xlabel("Time (month/year)") axs.set_ylabel(calc.name()) if save_out: file_name = make_into_file_name(title_name) plt.savefig(os.path.join(directories.ANALYSIS, file_name)) print("Timeseries plot is saved in the " + directories.ANALYSIS + " folder as a png file.")
def create_timeseries_helper(cube, variable, start_date, end_date, time_str, monthly, title_name, save_out, spatial, second_date_given, plot, analysis, start2, end2): """ Create timeseries helper function """ # Convert cube to pandas series dataframe pd_data = None try: pd_data = as_series(cube) except Exception: print( "WARNING in function create_timeseries: cannot construct timeseries with more than 1-dimension cube." ) return None # Get x values in series indices = pd_data.index if len(indices) == 1: print( "WARNING in function create_timeseries: cannot construct timeseries of one value." ) return None # Save dates that are in datetime format new_indices = [] for i in range(len(indices)): # Convert cftime.datetime to datetime dt = convert_cftime_datetime(indices[i]) new_indices.append(dt) indx = pd.DatetimeIndex(new_indices) data = np.asarray(cube.data) # df = pd.Series(pd_data.to_numpy(), index=indx) df = pd.DataFrame(data=data, index=indx, columns=[variable]) # Get names of indexes for which column is > 1e+20 indexNames = df[df[variable] >= 1e+20].index # Delete these row indexes from dataFrame df.drop(indexNames, inplace=True) df.dropna(inplace=True) # Save table to file for each variable # Construct file name date_str = " " + time_str + "_multimodel" if not second_date_given: date_str = " " + time_str + " " file_name = "ts_" + analysis + '_' + cube.name() + date_str + "_" + str( start_date[2]) + "-" + str(end_date[2]) if second_date_given: file_name = file_name + "_" + str(start2[2]) + '-' + str( end2[2]) + '_multi_model' file_name = make_into_file_name(file_name) if save_out: # np.savetxt(file_name, df.values, fmt='%d', comments="dates " + cube.name()) df.to_csv(os.path.join(directories.ANALYSIS, file_name + '.txt'), sep=',', index=True, index_label='dates') print("Timeseries data is saved in the " + directories.ANALYSIS + " folder as a txt file.") if plot: # Construct figure and axes sns.set(rc={'figure.figsize': (11, 4)}) num_years = len(np.unique(indx.year)) # Plot the timeseries fig, axs = plt.subplots(1, 1) # Timeseries df.plot(ax=axs, title=title_name, grid=True, legend=False, alpha=0.7, color='m', ls='-') if not monthly: df.resample('BM').mean().plot(ax=axs, style='-') rolling_str = 'one-year rolling mean' if num_years > 3: if monthly: df.rolling(12).mean().plot(ax=axs, style='-') else: df.rolling(365).mean().plot(ax=axs, style='-') if not monthly and num_years > 3: axs.legend(['input', 'monthly mean', rolling_str], loc='upper left') elif monthly and num_years > 3: axs.legend(['input', rolling_str], loc='upper left') elif num_years <= 2 and monthly: axs.legend(['input'], loc='upper left') elif num_years <= 2: axs.legend(['input', 'monthly mean'], loc='upper left') if second_date_given: axs.set_xlabel("Time (the control year)") else: axs.set_xlabel("Time (month/year)") axs.set_ylabel(cube.name()) if save_out: plt.savefig(os.path.join(directories.ANALYSIS, file_name)) print("Timeseries plot is saved in the " + directories.ANALYSIS + " folder as a png file.") # Boxplot - Yearly seasonality fig, axs1 = plt.subplots(1, 1) df['Month'] = df.index.month sns.boxplot(ax=axs1, data=df, x='Month', y=variable) axs1.set_title("Yearly seasonality of " + cube.name()) axs1.set_xlabel("Months") axs1.set_ylabel(cube.name()) if save_out: file_name = "bp_" + analysis + '_' + cube.name() + date_str file_name = make_into_file_name(file_name) plt.savefig(os.path.join(directories.ANALYSIS, file_name)) print("Box plot is saved in the " + directories.ANALYSIS + " folder as a png file.")
def create_timeseries(list_ens, start_date, end_date, variables, monthly=False, save_out=True, ens_num=1, func_name=None, second_date_given=False, plot=None): """ Analysis the data given - in this case it computes the timeseries (assumes grid/sample point) :param list_ens: the list of ensembles (dicts) containing the data of the climate variables :param start_date extract from end date from data, list [d, m, y] :param end_date: extract till end date from data, list [d, m, y] :param variables: If one variable - then have 1D histogram. If list of 2 variables, then 2D histogram :param monthly: data is stored in monthly increments (time = 12) else assumed (time = 365) :param save_out: if set, then save output of histogram/ rimeseries :param ens_num: selection of ensemble to use :param func_name: if user function analysis used, this is the function name :param second_date_given: if set, multi model averages calculated """ # Make sure data structures are not empty assert list_ens is not None assert variables is not None # If variables is just one object, cast to list if not isinstance(variables, list): variables = [variables] # Daily or monthly time_str = "daily" if monthly: time_str = "monthly" # Construct figure and axes sns.set(rc={'figure.figsize': (11, 4)}) # Convert pandas dates to matplotlib date format register_matplotlib_converters() for variable in variables: # Get cube from dictionary cube = list_ens[ens_num - 1][variable] # Construct title name title_name = cube.name() + " measured " + time_str + " between " + str( start_date[2]) + " and " + str(end_date[2]) if func_name is not None: title_name = func_name + " of " + title_name if second_date_given and func_name is not None: title_name = "Multi model " + title_name # Convert cube to pandas series dataframe pd_data = None try: pd_data = as_series(cube) except Exception: print( "WARNING in function create_timeseries: cannot construct timeseries with more than 1-dimension cube." ) return None # Get x values in series indices = pd_data.index if len(indices) == 1: print( "WARNING in function create_timeseries: cannot construct timeseries of one value." ) return None # Save dates that are in datetime format new_indices = [] selected_indices = [] for i in range(len(indices)): # Convert cftime.datetime to datetime dt = convert_cftime_datetime(indices[i]) new_indices.append(dt) # Get first of the year if dt.month == 1 and dt.day == 1: selected_indices.append(indices[i]) indx = pd.DatetimeIndex(new_indices) data = np.asarray(cube.data) df = pd.DataFrame(data=data, index=indx, columns=[variable]) # Save table to file for each variable # Construct file name date_str = " " + time_str + " " + str(start_date[2]) + "_" + str( end_date[2]) file_name = "ts_" + cube.name() + date_str file_name = make_into_file_name(file_name) if save_out: # np.savetxt(file_name, df.values, fmt='%d', comments="dates " + cube.name()) df.to_csv(os.path.join(directories.ANALYSIS, file_name + '.txt'), sep=',', index=True, index_label='dates') print("Timeseries data is saved in the " + directories.ANALYSIS + " folder as a txt file.") if plot: num_years = len(np.unique(indx.year)) # Plot the timeseries fig, axs = plt.subplots(1, 1) # Timeseries df.plot(ax=axs, title=title_name, grid=True, legend=False, alpha=0.7, color='m', ls='-') if not monthly: df.resample('BM').mean().plot(ax=axs, style='-') rolling_str = 'one-year rolling mean' if num_years > 3: if monthly: df.rolling(12).mean().plot(ax=axs, style='-') else: df.rolling(365).mean().plot(ax=axs, style='-') if not monthly and num_years > 3: axs.legend(['input', 'monthly mean', rolling_str], loc='upper left') elif monthly and num_years > 3: axs.legend(['input', rolling_str], loc='upper left') elif num_years <= 2 and monthly: axs.legend(['input'], loc='upper left') elif num_years <= 2: axs.legend(['input', 'monthly mean'], loc='upper left') axs.set_xlabel("Time (month/year)") axs.set_ylabel(cube.name()) if save_out: plt.savefig(os.path.join(directories.ANALYSIS, file_name)) print("Timeseries plot is saved in the " + directories.ANALYSIS + " folder as a png file.") # Boxplot - Yearly seasonality fig, axs1 = plt.subplots(1, 1) df['Month'] = df.index.month sns.boxplot(ax=axs1, data=df, x='Month', y=variable) axs1.set_title("Yearly seasonality of " + cube.name()) axs1.set_xlabel("Months") axs1.set_ylabel(cube.name())
kw = dict(k=10, max_dist=0.08, min_var=0.01) args = cube, tree, obs["lon"], obs["lat"] try: series, dist, idx = get_nearest_water(*args, **kw) except RuntimeError as e: print("Cannot download {!r}.\n{}".format(cube, e)) series = None except ValueError: status = "No Data" print("[{}] {}".format(status, obs["station_name"])) continue if not series: status = "Land " else: raw_series.update({station: series}) series = as_series(series) status = "Water " print("[{}] {}".format(status, obs["station_name"])) if raw_series: # Save cube. for station, cube in raw_series.items(): cube = add_station(cube, station) try: cube = iris.cube.CubeList(raw_series.values()).merge_cube() except MergeError as e: print(e) ensure_timeseries(cube) try: iris.save(cube, fname) except AttributeError: # FIXME: we should patch the bad attribute instead of removing everything. cube.attributes = {}
print cl # <codecell> fig, ax = plt.subplots(figsize=(12, 3.5)) qplt.plot(cl[2], label=cl[2].name()) plt.grid() # <headingcell level=2> # You can also convert Iris cube object to a Pandas Series object # <codecell> from iris.pandas import as_cube, as_series, as_data_frame df = as_series(cl[2]) df.head() # <codecell> df.plot(figsize=(12,3.5)); # <codecell> df.describe() # <codecell>