def test_masking_adapter(): for col in (None, "x"): ds = TestDataset("", n=20) ds_mask = MaskingAdapter(ds, "<", 10, col) data_masked = ds_mask.read_ts() data_masked2 = ds_mask.read() nptest.assert_almost_equal( data_masked["x"].values, np.concatenate( [np.ones((10), dtype=bool), np.zeros((10), dtype=bool)]), ) nptest.assert_almost_equal( data_masked2["x"].values, np.concatenate( [np.ones((10), dtype=bool), np.zeros((10), dtype=bool)]), ) if col is None: nptest.assert_almost_equal(data_masked["y"].values, np.ones((20), dtype=bool)) nptest.assert_almost_equal(data_masked2["y"].values, np.ones((20), dtype=bool))
def test_adapters_with_ascat(): ascat_data_folder = os.path.join( os.path.dirname(__file__), "..", "test-data", "sat", "ascat", "netcdf", "55R22", ) ascat_grid_folder = os.path.join( os.path.dirname(__file__), "..", "test-data", "sat", "ascat", "netcdf", "grid", ) grid_fname = os.path.join(ascat_grid_folder, "TUW_WARP5_grid_info_2_1.nc") ascat_reader = AscatGriddedNcTs( ascat_data_folder, "TUW_METOP_ASCAT_WARP55R22_{:04d}", grid_filename=grid_fname, ) ascat_anom = AnomalyAdapter(ascat_reader, window_size=35, columns=["sm"]) data = ascat_anom.read_ts(12.891455, 45.923004) assert data is not None assert np.any(data["sm"].values != 0) data = ascat_anom.read(12.891455, 45.923004) assert data is not None assert np.any(data["sm"].values != 0) ascat_self = SelfMaskingAdapter(ascat_reader, ">", 0, "sm") data2 = ascat_self.read_ts(12.891455, 45.923004) assert data2 is not None assert np.all(data2["sm"].values > 0) data2 = ascat_self.read(12.891455, 45.923004) assert data2 is not None assert np.all(data2["sm"].values > 0) ascat_mask = MaskingAdapter(ascat_reader, ">", 0, "sm") data3 = ascat_mask.read_ts(12.891455, 45.923004) assert data3 is not None assert np.any(data3["sm"].values) data3 = ascat_mask.read(12.891455, 45.923004) assert data3 is not None assert np.any(data3["sm"].values) ascat_clim = AnomalyClimAdapter(ascat_reader, columns=["sm"]) data4 = ascat_clim.read_ts(12.891455, 45.923004) assert data4 is not None assert np.any(data["sm"].values != 0) data4 = ascat_clim.read(12.891455, 45.923004) assert data4 is not None assert np.any(data["sm"].values != 0)
def test_masking_adapter(): ds = TestDataset('', n=20) ds_mask = MaskingAdapter(ds, '<', 10) data_masked = ds_mask.read_ts() nptest.assert_almost_equal(data_masked['x'].values, np.concatenate([np.ones((10), dtype=bool), np.zeros((10), dtype=bool)])) nptest.assert_almost_equal( data_masked['y'].values, np.ones((20), dtype=bool))
def test_masking_adapter(): ds = TestDataset('', n=20) ds_mask = MaskingAdapter(ds, '<', 10) data_masked = ds_mask.read_ts() nptest.assert_almost_equal( data_masked['x'].values, np.concatenate([np.ones((10), dtype=bool), np.zeros((10), dtype=bool)])) nptest.assert_almost_equal(data_masked['y'].values, np.ones((20), dtype=bool))
def test_timezone_removal(): tz_reader = TestTimezoneReader() reader_anom = AnomalyAdapter(tz_reader, window_size=35, columns=["data"]) assert reader_anom.read_ts(0) is not None reader_self = SelfMaskingAdapter(tz_reader, ">", 0, "data") assert reader_self.read_ts(0) is not None reader_mask = MaskingAdapter(tz_reader, ">", 0, "data") assert reader_mask.read_ts(0) is not None reader_clim = AnomalyClimAdapter(tz_reader, columns=["data"]) assert reader_clim.read_ts(0) is not None
def test_adapters_with_ascat(): ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder, grid_filename='TUW_WARP5_grid_info_2_1.nc') ascat_anom = AnomalyAdapter(ascat_reader, window_size=35, columns=['sm']) data = ascat_anom.read_ts(12.891455, 45.923004) assert data is not None assert np.any(data['sm'].values != 0) data = ascat_anom.read(12.891455, 45.923004) assert data is not None assert np.any(data['sm'].values != 0) ascat_self = SelfMaskingAdapter(ascat_reader, '>', 0, 'sm') data2 = ascat_self.read_ts(12.891455, 45.923004) assert data2 is not None assert np.all(data2['sm'].values > 0) data2 = ascat_self.read(12.891455, 45.923004) assert data2 is not None assert np.all(data2['sm'].values > 0) ascat_mask = MaskingAdapter(ascat_reader, '>', 0, 'sm') data3 = ascat_mask.read_ts(12.891455, 45.923004) assert data3 is not None assert np.any(data3['sm'].values) data3 = ascat_mask.read(12.891455, 45.923004) assert data3 is not None assert np.any(data3['sm'].values) ascat_clim = AnomalyClimAdapter(ascat_reader, columns=['sm']) data4 = ascat_clim.read_ts(12.891455, 45.923004) assert data4 is not None assert np.any(data['sm'].values != 0) data4 = ascat_clim.read(12.891455, 45.923004) assert data4 is not None assert np.any(data['sm'].values != 0)
# # Masking datasets are datasets that return a pandas DataFrame with boolean values. `True` means that the observation # should be masked, `False` means it should be kept. All masking datasets are temporally matched in pairs to the # temporal reference dataset. Only observations for which all masking datasets have a value of `False` are kept for # further validation. # # The masking datasets have the same format as the dataset dictionary and can be specified in the Validation class # with the `masking_datasets` keyword. # # ### Masking adapter # # To easily transform an existing dataset into a masking dataset `pytesmo` offers a adapter class that calls the # `read_ts` method of an existing dataset and creates a masking dataset based on an operator, a given threshold, and (optionally) a column name. # In[12]: from pytesmo.validation_framework.adapters import MaskingAdapter ds_mask = MaskingAdapter(ismn_reader, '<', 0.2, 'soil moisture') print(ds_mask.read_ts(ids[0]).head()) # ### Self-masking adapter # `pytesmo` also has a class that masks a dataset "on-the-fly", based on one of the columns it contains and an operator and a threshold. In contrast to the masking adapter mentioned above, the output of the self-masking adapter is the masked data, not the the mask. The self-masking adapter wraps a data reader, which must have a `read_ts` or `read` method. Calling its `read_ts`/`read` method will return the masked data - more precisely a DataFrame with only rows where the masking condition is true. # In[13]: from pytesmo.validation_framework.adapters import SelfMaskingAdapter ds_mask = SelfMaskingAdapter(ismn_reader, '<', 0.2, 'soil moisture') print(ds_mask.read_ts(ids[0]).head())
# ```python # from pytesmo.validation_framework import start_validation # # # Note that before starting the validation you must start a controller # # and engines, for example by using: ipcluster start -n 4 # # This command will launch a controller and 4 engines on the local machine. # # Also, do not forget to change the setup_code path to your current setup. # # setup_code = "my_validation.py" # start_validation(setup_code) # ``` # ## Masking datasets # # Masking datasets are datasets that return a pandas DataFrame with boolean values. `True` means that the observation should be masked, `False` means it should be kept. All masking datasets are temporally matched in pairs to the temporal reference dataset. Only observations for which all masking datasets have a value of `False` are kept for further validation. # # The masking datasets have the same format as the dataset dictionary and can be specified in the Validation class with the `masking_datasets` keyword. # # ### Masking adapter # # To easily transform an existing dataset into a masking dataset `pytesmo` offers a adapter class that calls the `read_ts` method of an existing dataset and performs the masking based on an operator and a given threshold. # In[12]: from pytesmo.validation_framework.adapters import MaskingAdapter ds_mask = MaskingAdapter(ismn_reader, '<', 0.2) print ds_mask.read_ts(ids[0])['soil moisture'].head()
# further validation. # # The masking datasets have the same format as the dataset dictionary and can be specified in the Validation class # with the `masking_datasets` keyword. # # ### Masking adapter # # To easily transform an existing dataset into a masking dataset `pytesmo` offers a adapter class that calls the # `read_ts` method of an existing dataset and creates a masking dataset based on an operator, a given threshold, and (optionally) a column name. # In[12]: from pytesmo.validation_framework.adapters import MaskingAdapter ds_mask = MaskingAdapter(ismn_reader, '<', 0.2, 'soil moisture') print(ds_mask.read_ts(ids[0]).head()) # ### Self-masking adapter # `pytesmo` also has a class that masks a dataset "on-the-fly", based on one of the columns it contains and an operator and a threshold. In contrast to the masking adapter mentioned above, the output of the self-masking adapter is the masked data, not the the mask. The self-masking adapter wraps a data reader, which must have a `read_ts` or `read` method. Calling its `read_ts`/`read` method will return the masked data - more precisely a DataFrame with only rows where the masking condition is true. # In[13]: from pytesmo.validation_framework.adapters import SelfMaskingAdapter ds_mask = SelfMaskingAdapter(ismn_reader, '<', 0.2, 'soil moisture') print(ds_mask.read_ts(ids[0]).head())
def getdata(): """ handles the get request, which should contain the arguments listes under parameters Parameters ---------- station_id: int id of station in database scaling: string chosen scaling method , for available choices see general.times_eries.scaling snow_depth: float mask snow depth greater than this value st_l1: float mask surface temperature layer1 lower than this value air_temp: float mask 2m air temperature lower than this value ssf_masking: boolean use SSF for masking true or false """ station_id = request.args.get('station_id') scaling = request.args.get('scaling') if scaling == 'noscale': scaling = None masking_ids = request.args.getlist('masking_ds[]') masking_ops = request.args.getlist('masking_op[]') masking_values = request.args.getlist('masking_values[]') masking_values = [float(x) for x in masking_values] anomaly = request.args.get('anomaly') if anomaly == 'none': anomaly = None (depth_from, depth_to, sensor_id) = get_station_first_sm_layer(app.config['ISMN_PATH'], station_id) lon, lat = get_station_lonlat(app.config['ISMN_PATH'], station_id) start, end = get_station_start_end(app.config['ISMN_PATH'], station_id, "soil moisture", depth_from, depth_to) period = [start, end] masking_data = {'labels': [], 'data': []} masking_meta = get_masking_metadata() masking_masked_dict = None if len(masking_ids) > 0: # prepare masking datasets masking_ds_dict = get_masking_ds_dict(masking_ids) masking_masked_dict = {} for masking_ds, masking_op, masking_value in zip(masking_ids, masking_ops, masking_values): masking_masked_dict[masking_ds] = dict(masking_ds_dict[masking_ds]) new_cls = MaskingAdapter(masking_masked_dict[masking_ds]['class'], masking_op, masking_value) masking_masked_dict[masking_ds]['class'] = new_cls # use DataManager for reading masking datasets masking_dm = DataManager(masking_ds_dict, masking_ids[0], period=period) masking_data = {} valid_masking_ids = [] for mds in masking_ids: mdata = masking_dm.read_ds(mds, lon, lat) if mdata is not None: masking_data[mds] = mdata valid_masking_ids.append(mds) else: masking_data[mds] = pd.DataFrame() if len(valid_masking_ids) > 1: masking_data = BasicTemporalMatching(window=1.0).combinatory_matcher( masking_data, masking_ids[0], n=len(masking_ids)) if len(masking_data) > 0: labels, values = masking_data[ masking_data.keys()[0]].to_dygraph_format() elif len(valid_masking_ids) == 1: masking_data = masking_data[valid_masking_ids[0]] labels, values = masking_data.to_dygraph_format() else: labels = [None] values = None for i, label in enumerate(labels): for mid in masking_meta: if masking_meta[mid]['variable']['name'] in label: labels[i] = masking_meta[mid]['long_name'] masking_data = {'labels': labels, 'data': values} ismn_iface = prepare_station_interface(app.config['ISMN_PATH'], station_id, "soil moisture", depth_from, depth_to, sensor_id) validation_ds_dict = get_validation_ds_dict() validation_ds_dict.update({'ISMN': {'class': ismn_iface, 'columns': ['soil moisture']}}) if anomaly is not None: adapter = {'climatology': AnomalyClimAdapter, 'average': AnomalyAdapter} for dataset in validation_ds_dict: validation_ds_dict[dataset]['class'] = adapter[ anomaly](validation_ds_dict[dataset]['class'], columns=validation_ds_dict[dataset]['columns']) mcalc = BasicMetricsPlusMSE(other_name='k1', calc_tau=True).calc_metrics process = Validation(validation_ds_dict, 'ISMN', temporal_ref='cci', scaling=scaling, metrics_calculators={(2, 2): mcalc}, masking_datasets=masking_masked_dict, period=period, temporal_window=1) df_dict = process.data_manager.get_data(1, lon, lat) matched_data, result, used_data = process.perform_validation( df_dict, (1, lon, lat)) res_key = list(result)[0] data = used_data[res_key] result = result[res_key][0] # rename data to original names rename_dict = {} f = lambda x: "k{}".format(x) if x > 0 else 'ref' for i, r in enumerate(res_key): rename_dict[f(i)] = " ".join(r) data.rename(columns=rename_dict, inplace=True) labels, values = data.to_dygraph_format() validation_datasets = {'labels': labels, 'data': values} statistics = {'kendall': {'v': '%.2f' % result['tau'], 'p': '%.4f' % result['p_tau']}, 'spearman': {'v': '%.2f' % result['rho'], 'p': '%.4f' % result['p_rho']}, 'pearson': {'v': '%.2f' % result['R'], 'p': '%.4f' % result['p_R']}, 'bias': '%.4f' % result['BIAS'], 'rmsd': {'rmsd': '%.4f' % np.sqrt(result['mse']), 'rmsd_corr': '%.4f' % np.sqrt(result['mse_corr']), 'rmsd_bias': '%.4f' % np.sqrt(result['mse_bias']), 'rmsd_var': '%.4f' % np.sqrt(result['mse_var'])}, 'mse': {'mse': '%.4f' % result['mse'], 'mse_corr': '%.4f' % result['mse_corr'], 'mse_bias': '%.4f' % result['mse_bias'], 'mse_var': '%.4f' % result['mse_var']}} scaling_options = {'noscale': 'No scaling', 'porosity': 'Scale using porosity', 'linreg': 'Linear Regression', 'mean_std': 'Mean - standard deviation', 'min_max': 'Minimum,maximum', 'lin_cdf_match': 'Piecewise <br> linear CDF matching', 'cdf_match': 'CDF matching'} if scaling is None: scaling = 'noscale' masking_option_return = {} for mid, mops, mval in zip(masking_ids, masking_ops, masking_values): masking_option_return[mid] = {'op': mops, 'val': mval, 'name': masking_meta[mid]['long_name']} settings = {'scaling': scaling_options[scaling], 'masking': masking_option_return} output_data = {'validation_data': validation_datasets, 'masking_data': masking_data, 'statistics': statistics, 'settings': settings} status = 1 if status == -1: data = 'Error' else: data = jsonify(output_data) resp = make_response(data) resp.headers['Access-Control-Allow-Origin'] = '*' return resp