def calc_statistics(self, constrain_val_range=False, use_area_weights=False, **kwargs): """Calculate statistics from model and obs data Wrapper for function :func:`pyaerocom.mathutils.calc_statistics` Returns ------- dict dictionary containing statistical parameters """ if constrain_val_range: var = Variable(self.meta['var_name'][1]) kwargs['lowlim'] = var.lower_limit kwargs['highlim'] = var.upper_limit if use_area_weights and not 'weights' in kwargs and self.has_latlon_dims: kwargs['weights'] = self.area_weights[0].flatten() elif 'weights' in kwargs: raise ValueError('Invalid input combination: weights are provided ' 'but use_area_weights is set to False...') stats = calc_statistics(self.data.values[1].flatten(), self.data.values[0].flatten(), **kwargs) stats['num_coords_with_data'] = self.num_coords_with_data stats['num_coords_tot'] = self.num_coords return stats
def calc_statistics(self, constrain_val_range=False, **kwargs): """Calculate statistics from data ensemble Wrapper for function :func:`pyaerocom.mathutils.calc_statistics` Returns ------- dict dictionary containing statistical parameters """ if constrain_val_range: var = Variable(self.meta['var_name'][1]) kwargs['lowlim'] = var.lower_limit kwargs['highlim'] = var.upper_limit return calc_statistics(self.data.values[1].flatten(), self.data.values[0].flatten(), **kwargs)
def _process_sites(data, jsdate, regions_how, meta_glob): ts_objs = [] map_data = [] scat_data = {} for freq, cd in data.items(): if isinstance(cd, ColocatedData): _check_flatten_latlon_dims(cd) assert cd.dims == ('data_source', 'time', 'station_name') mon = data['monthly'] stats_dummy = _init_stats_dummy() default_regs = get_all_default_regions(use_all_in_ini=False) lats = mon.data.latitude.values.astype(np.float64) lons = mon.data.longitude.values.astype(np.float64) if 'altitude' in mon.data.coords: alts = mon.data.altitude.values.astype(np.float64) else: alts = [np.nan] * len(lats) if regions_how == 'country': countries = mon.data.country.values dc = 0 for i, stat_name in enumerate(mon.data.station_name.values): has_data = False ts_data = _init_ts_data() ts_data['station_name'] = stat_name ts_data.update(meta_glob) stat_lat = lats[i] stat_lon = lons[i] stat_alt = alts[i] if regions_how == 'default': region = find_closest_region_coord(stat_lat, stat_lon, default_regs=default_regs) elif regions_how == 'country': region = countries[i] # station information for map view map_stat = { 'site': stat_name, 'lat': stat_lat, 'lon': stat_lon, 'alt': stat_alt, 'region': region } for tres, coldata in data.items(): map_stat['{}_statistics'.format(tres)] = {} if coldata is None: map_stat['{}_statistics'.format(tres)].update(stats_dummy) continue arr = coldata.data obs_vals = arr.data[0, :, i] if all(np.isnan(obs_vals)): map_stat['{}_statistics'.format(tres)].update(stats_dummy) continue has_data = True mod_vals = arr.data[1, :, i] ts_data['{}_date'.format(tres)] = jsdate[tres] ts_data['{}_obs'.format(tres)] = obs_vals.tolist() ts_data['{}_mod'.format(tres)] = mod_vals.tolist() station_statistics = calc_statistics(mod_vals, obs_vals, min_num_valid=1) for k, v in station_statistics.items(): station_statistics[k] = np.float64(v) map_stat['{}_statistics'.format(tres)] = station_statistics if has_data: ts_objs.append(ts_data) map_data.append(map_stat) scat_data[str(stat_name)] = sc = {} sc['obs'] = ts_data['monthly_obs'] sc['mod'] = ts_data['monthly_mod'] sc['region'] = region dc += 1 return (map_data, scat_data, ts_objs)
def _init_stats_dummy(): # dummy for statistics dictionary for locations without data stats_dummy = {} for k in calc_statistics([1], [1]): stats_dummy[k] = np.nan return stats_dummy
def plot_scatter(x_vals, y_vals, var_name=None, var_name_ref=None, x_name=None, y_name=None, start=None, stop=None, ts_type=None, unit=None, stations_ok=None, filter_name=None, lowlim_stats=None, highlim_stats=None, loglog=None, savefig=False, save_dir=None, save_name=None, ax=None, figsize=None, fontsize_base=10): """Method that performs a scatter plot of data in AEROCOM format Parameters ---------- y_vals : ndarray 1D array (or list) of model data points (y-axis) x_vals : ndarray 1D array (or list) of observation data points (x-axis) var_name : :obj:`str`, optional name of variable that is plotted var_name_ref : :obj:`str`, optional name of variable of reference data x_name : :obj:`str`, optional Name of observation network y_name : :obj:`str`, optional Name / ID of model start : :obj:`str` or :obj`datetime` or similar start time of data stop : :obj:`str` or :obj`datetime` or similar stop time of data """ if isinstance(y_vals, list): y_vals = np.asarray(y_vals) if isinstance(x_vals, list): x_vals = np.asarray(x_vals) try: VAR_PARAM = const.VAR_PARAM[var_name] except: VAR_PARAM = const.VAR_PARAM.DEFAULT if loglog is None: loglog = VAR_PARAM.scat_loglog xlim = VAR_PARAM['scat_xlim'] ylim = VAR_PARAM['scat_ylim'] if ax is None: if figsize is None: figsize = (10,8) fig, ax = plt.subplots(figsize=figsize) if var_name is None: var_name = 'n/d' statistics = calc_statistics(y_vals, x_vals, lowlim_stats, highlim_stats) if loglog: ax.loglog(x_vals, y_vals, ' k+') else: ax.plot(x_vals, y_vals, ' k+') try: title = start_stop_str(start, stop, ts_type) if ts_type is not None: title += ' ({})'.format(ts_type) except: title = '' if not loglog: xlim[0] = 0 ylim[0] = 0 ax.set_xlim(xlim) ax.set_ylim(ylim) xlbl = '{}'.format(x_name) if var_name_ref is not None: xlbl += ' ({})'.format(var_name_ref) ax.set_xlabel(xlbl, fontsize=fontsize_base+4) ax.set_ylabel('{}'.format(y_name), fontsize=fontsize_base+4) ax.set_title(title, fontsize=fontsize_base+4) ax.xaxis.set_major_formatter(ScalarFormatter()) ax.yaxis.set_major_formatter(ScalarFormatter()) ax.tick_params(labelsize=fontsize_base) ax.plot(VAR_PARAM['scat_xlim'], VAR_PARAM['scat_ylim'], '-', color='grey') xypos = {'var_info' : (0.01, .95), 'refdata_mean' : (0.01, 0.90), 'data_mean' : (0.01, 0.86), 'nmb' : (0.01, 0.82), 'mnmb' : (0.35, 0.82), 'R' : (0.01, 0.78), 'rms' : (0.35, 0.78), 'R_kendall' : (0.01, 0.74), 'fge' : (0.35, 0.74), 'ts_type' : (0.8, 0.1), 'filter_name' : (0.8, 0.06)} var_str = var_name# + VAR_PARAM.unit_str if unit is not None and unit != 1: var_str += ' [{}]'.format(unit) ax.annotate("{} #: {} # st: {}".format(var_str, statistics['success'], stations_ok), xy=xypos['var_info'], xycoords='axes fraction', fontsize=fontsize_base+4, color='red') ax.annotate('Mean (x-data): {:.3f}'.format(statistics['refdata_mean']), xy=xypos['refdata_mean'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('Mean (y-data): {:.3f}'.format(statistics['data_mean']), xy=xypos['data_mean'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('NMB: {:.1f}%'.format(statistics['nmb']), xy=xypos['nmb'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('MNMB: {:.1f}%'.format(statistics['mnmb']), xy=xypos['mnmb'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('R (Pearson): {:.3f}'.format(statistics['R']), xy=xypos['R'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('RMS: {:.3f}'.format(statistics['rms']), xy=xypos['rms'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('R (Kendall): {:.3f}'.format(statistics['R_kendall']), xy=xypos['R_kendall'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('FGE: {:.1f}'.format(statistics['fge']), xy=xypos['fge'], xycoords='axes fraction', fontsize=fontsize_base, color='red') # right lower part ax.annotate('{}'.format(ts_type), xy=xypos['ts_type'], xycoords='axes fraction', ha='center', fontsize=fontsize_base, color='black') ax.annotate('{}'.format(filter_name), xy=xypos['filter_name'], xycoords='axes fraction', ha='center', fontsize=fontsize_base, color='black') ax.set_aspect('equal') if savefig: if any([x is None for x in (save_dir, save_name)]): raise IOError fig.savefig(os.path.join(save_dir, save_name)) return ax
def plot_scatter_aerocom(x_vals, y_vals, var_name=None, var_name_ref=None, x_name=None, y_name=None, start=None, stop=None, ts_type=None, unit=None, stations_ok=None, filter_name=None, lowlim_stats=None, highlim_stats=None, loglog=None, savefig=False, save_dir=None, save_name=None, ax=None, figsize=None, fontsize_base=10, marker='+', color='k', alpha=0.5, **kwargs): """Method that performs a scatter plot of data in AEROCOM format Parameters ---------- y_vals : ndarray 1D array (or list) of model data points (y-axis) x_vals : ndarray 1D array (or list) of observation data points (x-axis) var_name : :obj:`str`, optional name of variable that is plotted var_name_ref : :obj:`str`, optional name of variable of reference data x_name : :obj:`str`, optional Name of observation network y_name : :obj:`str`, optional Name / ID of model start : :obj:`str` or :obj`datetime` or similar start time of data stop : :obj:`str` or :obj`datetime` or similar stop time of data Returns ------- axes instance of :class:`matplotlib.axes` """ if isinstance(y_vals, list): y_vals = np.asarray(y_vals) if isinstance(x_vals, list): x_vals = np.asarray(x_vals) try: VARS = const.VARS[var_name] except: VARS = const.VARS.DEFAULT if loglog is None: loglog = VARS.scat_loglog xlim = VARS['scat_xlim'] ylim = VARS['scat_ylim'] if xlim is None or ylim is None: low = np.min([np.nanmin(x_vals), np.nanmin(y_vals)]) high = np.max([np.nanmax(x_vals), np.nanmax(y_vals)]) xlim = [low, high] ylim = [low, high] if ax is None: if figsize is None: figsize = (10,8) fig, ax = plt.subplots(figsize=figsize) if var_name is None: var_name = 'n/d' statistics = calc_statistics(y_vals, x_vals, lowlim_stats, highlim_stats) if loglog: ax.loglog(x_vals, y_vals, ls='none', color=color, marker=marker, alpha=alpha, **kwargs) else: ax.plot(x_vals, y_vals, ls='none', color=color, marker=marker, alpha=alpha, **kwargs) try: title = start_stop_str(start, stop, ts_type) if ts_type is not None: title += ' ({})'.format(ts_type) except: title = '' if not loglog: xlim[0] = 0 ylim[0] = 0 elif any(x[0] < 0 for x in [xlim, ylim]): low = 10**(float(exponent(abs(np.nanmin(y_vals))) - 1)) xlim[0] = low ylim[0] = low ax.set_xlim(xlim) ax.set_ylim(ylim) xlbl = '{}'.format(x_name) if var_name_ref is not None: xlbl += ' ({})'.format(var_name_ref) ax.set_xlabel(xlbl, fontsize=fontsize_base+4) ax.set_ylabel('{}'.format(y_name), fontsize=fontsize_base+4) ax.set_title(title, fontsize=fontsize_base+4) ax.xaxis.set_major_formatter(ScalarFormatter()) ax.yaxis.set_major_formatter(ScalarFormatter()) ax.tick_params(labelsize=fontsize_base) ax.plot(xlim, ylim, '-', color='grey') xypos = {'var_info' : (0.01, .95), 'refdata_mean' : (0.01, 0.90), 'data_mean' : (0.01, 0.86), 'nmb' : (0.01, 0.82), 'mnmb' : (0.35, 0.82), 'R' : (0.01, 0.78), 'rms' : (0.35, 0.78), 'R_kendall' : (0.01, 0.74), 'fge' : (0.35, 0.74), 'ts_type' : (0.8, 0.1), 'filter_name' : (0.8, 0.06)} var_str = var_name# + VARS.unit_str _ndig = abs(exponent(statistics['refdata_mean']) - 2) if unit is None: unit = 'N/D' if not str(unit) in ['1', 'no_unit']: var_str += ' [{}]'.format(unit) ax.annotate("{} #: {} # st: {}".format(var_str, statistics['num_valid'], stations_ok), xy=xypos['var_info'], xycoords='axes fraction', fontsize=fontsize_base+4, color='red') ax.annotate('Mean (x-data): {:.{}f}; Rng: [{:.{}f}, {:.{}f}]' .format(statistics['refdata_mean'], _ndig, np.nanmin(x_vals),_ndig, np.nanmax(x_vals), _ndig), xy=xypos['refdata_mean'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('Mean (y-data): {:.{}f}; Rng: [{:.{}f}, {:.{}f}]' .format(statistics['data_mean'], _ndig, np.nanmin(y_vals),_ndig, np.nanmax(y_vals), _ndig), xy=xypos['data_mean'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('NMB: {:.1f}%'.format(statistics['nmb']*100), xy=xypos['nmb'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('MNMB: {:.1f}%'.format(statistics['mnmb']*100), xy=xypos['mnmb'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('R (Pearson): {:.3f}'.format(statistics['R']), xy=xypos['R'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('RMS: {:.3f}'.format(statistics['rms']), xy=xypos['rms'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('R (Kendall): {:.3f}'.format(statistics['R_kendall']), xy=xypos['R_kendall'], xycoords='axes fraction', fontsize=fontsize_base, color='red') ax.annotate('FGE: {:.1f}'.format(statistics['fge']), xy=xypos['fge'], xycoords='axes fraction', fontsize=fontsize_base, color='red') # right lower part ax.annotate('{}'.format(ts_type), xy=xypos['ts_type'], xycoords='axes fraction', ha='center', fontsize=fontsize_base, color='black') ax.annotate('{}'.format(filter_name), xy=xypos['filter_name'], xycoords='axes fraction', ha='center', fontsize=fontsize_base, color='black') ax.set_aspect('equal') if savefig: if any([x is None for x in (save_dir, save_name)]): raise IOError fig.savefig(os.path.join(save_dir, save_name)) return ax
def compute_json_files_from_colocateddata_v0(coldata, obs_name, model_name, use_weights, colocation_settings, vert_code, out_dirs): """Creates all json files for one ColocatedData object First version """ if not isinstance(coldata, ColocatedData): raise ValueError('Need ColocatedData object, got {}'.format( type(coldata))) stats_dummy = {} for k in calc_statistics([1], [1]): stats_dummy[k] = np.nan stacked = False if 'altitude' in coldata.data.dims: raise NotImplementedError('Cannot yet handle profile data') if not 'station_name' in coldata.data.coords: if not coldata.data.ndim == 4: raise DataDimensionError('Invalid number of dimensions. ' 'Need 4, got: {}'.format( coldata.data.dims)) elif not 'latitude' in coldata.data.dims and 'longitude' in coldata.data.dims: raise DataDimensionError('Need latitude and longitude ' 'dimension. Got {}'.format( coldata.data.dims)) coldata.data = coldata.data.stack(station_name=('latitude', 'longitude')) stacked = True ts_types_order = const.GRID_IO.TS_TYPES to_ts_types = ['daily', 'monthly', 'yearly'] data_arrs = dict.fromkeys(to_ts_types) jsdate = dict.fromkeys(to_ts_types) ts_type = coldata.meta['ts_type'] for freq in to_ts_types: if ts_types_order.index(freq) < ts_types_order.index(ts_type): data_arrs[freq] = None elif ts_types_order.index(freq) == ts_types_order.index(ts_type): data_arrs[freq] = coldata.data js = (coldata.data.time.values.astype('datetime64[s]') - np.datetime64('1970', '[s]')).astype(int) * 1000 jsdate[freq] = js.tolist() else: colstp = colocation_settings _a = coldata.resample_time( to_ts_type=freq, apply_constraints=colstp.apply_time_resampling_constraints, min_num_obs=colstp.min_num_obs, colocate_time=colstp.colocate_time, inplace=False).data data_arrs[freq] = _a #= resample_time_dataarray(arr, freq=freq) js = (_a.time.values.astype('datetime64[s]') - np.datetime64('1970', '[s]')).astype(int) * 1000 jsdate[freq] = js.tolist() #print(jsdate) obs_id = coldata.meta['data_source'][0] model_id = coldata.meta['data_source'][1] obs_var = coldata.meta['var_name'][0] model_var = coldata.meta['var_name'][1] ts_objs = [] map_data = [] scat_data = {} hm_data = {} # data used for heatmap display in interface if stacked: hmd = ColocatedData(data_arrs[ts_type].unstack('station_name')) else: hmd = ColocatedData(data_arrs[ts_type]) for reg in get_all_default_region_ids(): filtered = hmd.filter_region(region_id=reg) stats = filtered.calc_statistics(use_area_weights=use_weights) for k, v in stats.items(): if not k == 'NOTE': v = np.float64(v) stats[k] = v hm_data[reg] = stats hm_file = os.path.join(out_dirs['hm'], HEATMAP_FILENAME_EVAL_IFACE) add_entry_heatmap_json(hm_file, hm_data, obs_name, obs_var, vert_code, model_name, model_var) if vert_code == 'ModelLevel': raise NotImplementedError('Coming soon...') const.print_log.info('Computing json files for {} vs. {}'.format( model_name, obs_name)) for i, stat_name in enumerate(coldata.data.station_name.values): has_data = False ts_data = {} ts_data['station_name'] = stat_name ts_data['pyaerocom_version'] = pyaerocom_version ts_data['obs_name'] = obs_name ts_data['model_name'] = model_name ts_data['obs_var'] = coldata.meta['var_name'][0] ts_data['obs_unit'] = coldata.meta['var_units'][0] ts_data['vert_code'] = vert_code ts_data['obs_freq_src'] = coldata.meta['ts_type_src'][0] ts_data['obs_revision'] = coldata.meta['revision_ref'] ts_data['mod_var'] = coldata.meta['var_name'][1] ts_data['mod_unit'] = coldata.meta['var_units'][1] ts_data['mod_freq_src'] = coldata.meta['ts_type_src'][1] stat_lat = np.float64(coldata.data.latitude[i]) stat_lon = np.float64(coldata.data.longitude[i]) if 'altitude' in coldata.data.coords: stat_alt = np.float64(coldata.data.altitude[i]) else: stat_alt = np.nan region = find_closest_region_coord(stat_lat, stat_lon) # station information for map view map_stat = { 'site': stat_name, 'lat': stat_lat, 'lon': stat_lon, 'alt': stat_alt, 'region': region } for tres, arr in data_arrs.items(): map_stat['{}_statistics'.format(tres)] = {} if arr is None: ts_data['{}_date'.format(tres)] = [] ts_data['{}_obs'.format(tres)] = [] ts_data['{}_mod'.format(tres)] = [] map_stat['{}_statistics'.format(tres)].update(stats_dummy) continue obs_vals = arr.sel(data_source=obs_id, station_name=stat_name).values if all(np.isnan(obs_vals)): ts_data['{}_date'.format(tres)] = [] ts_data['{}_obs'.format(tres)] = [] ts_data['{}_mod'.format(tres)] = [] map_stat['{}_statistics'.format(tres)].update(stats_dummy) continue has_data = True mod_vals = arr.sel(data_source=model_id, station_name=stat_name).values if not len(jsdate[tres]) == len(obs_vals): raise Exception('Please debug...') ts_data['{}_date'.format(tres)] = jsdate[tres] ts_data['{}_obs'.format(tres)] = obs_vals.tolist() ts_data['{}_mod'.format(tres)] = mod_vals.tolist() station_statistics = calc_statistics(mod_vals, obs_vals) for k, v in station_statistics.items(): station_statistics[k] = np.float64(v) map_stat['{}_statistics'.format(tres)] = station_statistics if has_data: ts_objs.append(ts_data) map_data.append(map_stat) scat_data[str(stat_name)] = sc = {} sc['obs'] = ts_data['monthly_obs'] sc['mod'] = ts_data['monthly_mod'] sc['region'] = region dirs = out_dirs map_name = get_json_mapname(obs_name, obs_var, model_name, model_var, vert_code) outfile_map = os.path.join(dirs['map'], map_name) with open(outfile_map, 'w') as f: simplejson.dump(map_data, f, ignore_nan=True) outfile_scat = os.path.join(dirs['scat'], map_name) with open(outfile_scat, 'w') as f: simplejson.dump(scat_data, f, ignore_nan=True) for ts_data in ts_objs: #writes json file _write_stationdata_json(ts_data, out_dirs)