Esempio n. 1
0
def test_masking_adapter():
    for col in (None, "x"):
        ds = TestDataset("", n=20)
        ds_mask = MaskingAdapter(ds, "<", 10, col)
        data_masked = ds_mask.read_ts()
        data_masked2 = ds_mask.read()

        nptest.assert_almost_equal(
            data_masked["x"].values,
            np.concatenate(
                [np.ones((10), dtype=bool),
                 np.zeros((10), dtype=bool)]),
        )
        nptest.assert_almost_equal(
            data_masked2["x"].values,
            np.concatenate(
                [np.ones((10), dtype=bool),
                 np.zeros((10), dtype=bool)]),
        )

        if col is None:
            nptest.assert_almost_equal(data_masked["y"].values,
                                       np.ones((20), dtype=bool))
            nptest.assert_almost_equal(data_masked2["y"].values,
                                       np.ones((20), dtype=bool))
Esempio n. 2
0
def test_adapters_with_ascat():
    ascat_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "sat",
        "ascat",
        "netcdf",
        "55R22",
    )
    ascat_grid_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "sat",
        "ascat",
        "netcdf",
        "grid",
    )
    grid_fname = os.path.join(ascat_grid_folder, "TUW_WARP5_grid_info_2_1.nc")

    ascat_reader = AscatGriddedNcTs(
        ascat_data_folder,
        "TUW_METOP_ASCAT_WARP55R22_{:04d}",
        grid_filename=grid_fname,
    )

    ascat_anom = AnomalyAdapter(ascat_reader, window_size=35, columns=["sm"])
    data = ascat_anom.read_ts(12.891455, 45.923004)
    assert data is not None
    assert np.any(data["sm"].values != 0)
    data = ascat_anom.read(12.891455, 45.923004)
    assert data is not None
    assert np.any(data["sm"].values != 0)

    ascat_self = SelfMaskingAdapter(ascat_reader, ">", 0, "sm")
    data2 = ascat_self.read_ts(12.891455, 45.923004)
    assert data2 is not None
    assert np.all(data2["sm"].values > 0)
    data2 = ascat_self.read(12.891455, 45.923004)
    assert data2 is not None
    assert np.all(data2["sm"].values > 0)

    ascat_mask = MaskingAdapter(ascat_reader, ">", 0, "sm")
    data3 = ascat_mask.read_ts(12.891455, 45.923004)
    assert data3 is not None
    assert np.any(data3["sm"].values)
    data3 = ascat_mask.read(12.891455, 45.923004)
    assert data3 is not None
    assert np.any(data3["sm"].values)

    ascat_clim = AnomalyClimAdapter(ascat_reader, columns=["sm"])
    data4 = ascat_clim.read_ts(12.891455, 45.923004)
    assert data4 is not None
    assert np.any(data["sm"].values != 0)
    data4 = ascat_clim.read(12.891455, 45.923004)
    assert data4 is not None
    assert np.any(data["sm"].values != 0)
Esempio n. 3
0
def test_masking_adapter():
    ds = TestDataset('', n=20)
    ds_mask = MaskingAdapter(ds, '<', 10)
    data_masked = ds_mask.read_ts()
    nptest.assert_almost_equal(data_masked['x'].values,
                               np.concatenate([np.ones((10), dtype=bool),
                                               np.zeros((10), dtype=bool)]))

    nptest.assert_almost_equal(
        data_masked['y'].values, np.ones((20), dtype=bool))
Esempio n. 4
0
def test_masking_adapter():
    ds = TestDataset('', n=20)
    ds_mask = MaskingAdapter(ds, '<', 10)
    data_masked = ds_mask.read_ts()
    nptest.assert_almost_equal(
        data_masked['x'].values,
        np.concatenate([np.ones((10), dtype=bool),
                        np.zeros((10), dtype=bool)]))

    nptest.assert_almost_equal(data_masked['y'].values,
                               np.ones((20), dtype=bool))
Esempio n. 5
0
def test_timezone_removal():
    tz_reader = TestTimezoneReader()

    reader_anom = AnomalyAdapter(tz_reader, window_size=35, columns=["data"])
    assert reader_anom.read_ts(0) is not None

    reader_self = SelfMaskingAdapter(tz_reader, ">", 0, "data")
    assert reader_self.read_ts(0) is not None

    reader_mask = MaskingAdapter(tz_reader, ">", 0, "data")
    assert reader_mask.read_ts(0) is not None

    reader_clim = AnomalyClimAdapter(tz_reader, columns=["data"])
    assert reader_clim.read_ts(0) is not None
Esempio n. 6
0
def test_adapters_with_ascat():
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     '55R22')
    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     'grid')

    ascat_reader = AscatSsmCdr(ascat_data_folder,
                               ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc')

    ascat_anom = AnomalyAdapter(ascat_reader, window_size=35, columns=['sm'])
    data = ascat_anom.read_ts(12.891455, 45.923004)
    assert data is not None
    assert np.any(data['sm'].values != 0)
    data = ascat_anom.read(12.891455, 45.923004)
    assert data is not None
    assert np.any(data['sm'].values != 0)

    ascat_self = SelfMaskingAdapter(ascat_reader, '>', 0, 'sm')
    data2 = ascat_self.read_ts(12.891455, 45.923004)
    assert data2 is not None
    assert np.all(data2['sm'].values > 0)
    data2 = ascat_self.read(12.891455, 45.923004)
    assert data2 is not None
    assert np.all(data2['sm'].values > 0)

    ascat_mask = MaskingAdapter(ascat_reader, '>', 0, 'sm')
    data3 = ascat_mask.read_ts(12.891455, 45.923004)
    assert data3 is not None
    assert np.any(data3['sm'].values)
    data3 = ascat_mask.read(12.891455, 45.923004)
    assert data3 is not None
    assert np.any(data3['sm'].values)

    ascat_clim = AnomalyClimAdapter(ascat_reader, columns=['sm'])
    data4 = ascat_clim.read_ts(12.891455, 45.923004)
    assert data4 is not None
    assert np.any(data['sm'].values != 0)
    data4 = ascat_clim.read(12.891455, 45.923004)
    assert data4 is not None
    assert np.any(data['sm'].values != 0)
Esempio n. 7
0
#
# Masking datasets are datasets that return a pandas DataFrame with boolean values. `True` means that the observation
#  should be masked, `False` means it should be kept. All masking datasets are temporally matched in pairs to the
# temporal reference dataset. Only observations for which all masking datasets have a value of `False` are kept for
# further validation.
#
# The masking datasets have the same format as the dataset dictionary and can be specified in the Validation class
# with the `masking_datasets` keyword.
#
# ### Masking adapter
#
# To easily transform an existing dataset into a masking dataset `pytesmo` offers a adapter class that calls the
# `read_ts` method of an existing dataset and creates a masking dataset based on an operator, a given threshold, and (optionally) a column name.

# In[12]:

from pytesmo.validation_framework.adapters import MaskingAdapter

ds_mask = MaskingAdapter(ismn_reader, '<', 0.2, 'soil moisture')
print(ds_mask.read_ts(ids[0]).head())

# ### Self-masking adapter
# `pytesmo` also has a class that masks a dataset "on-the-fly", based on one of the columns it contains and an operator and a threshold. In contrast to the masking adapter mentioned above, the output of the self-masking adapter is the masked data, not the the mask. The self-masking adapter wraps a data reader, which must have a `read_ts` or `read` method. Calling its `read_ts`/`read` method will return the masked data - more precisely a DataFrame with only rows where the masking condition is true.

# In[13]:

from pytesmo.validation_framework.adapters import SelfMaskingAdapter

ds_mask = SelfMaskingAdapter(ismn_reader, '<', 0.2, 'soil moisture')
print(ds_mask.read_ts(ids[0]).head())
Esempio n. 8
0
# ```python
# from pytesmo.validation_framework import start_validation
# 
# # Note that before starting the validation you must start a controller
# # and engines, for example by using: ipcluster start -n 4
# # This command will launch a controller and 4 engines on the local machine.
# # Also, do not forget to change the setup_code path to your current setup.
# 
# setup_code = "my_validation.py"
# start_validation(setup_code)
# ```

# ## Masking datasets
# 
# Masking datasets are datasets that return a pandas DataFrame with boolean values. `True` means that the observation should be masked, `False` means it should be kept. All masking datasets are temporally matched in pairs to the temporal reference dataset. Only observations for which all masking datasets have a value of `False` are kept for further validation.
# 
# The masking datasets have the same format as the dataset dictionary and can be specified in the Validation class with the `masking_datasets` keyword.
# 
# ### Masking adapter
# 
# To easily transform an existing dataset into a masking dataset `pytesmo` offers a adapter class that calls the `read_ts` method of an existing dataset and performs the masking based on an operator and a given threshold.

# In[12]:

from pytesmo.validation_framework.adapters import MaskingAdapter

ds_mask = MaskingAdapter(ismn_reader, '<', 0.2)
print ds_mask.read_ts(ids[0])['soil moisture'].head()

Esempio n. 9
0
# further validation.
# 
# The masking datasets have the same format as the dataset dictionary and can be specified in the Validation class
# with the `masking_datasets` keyword.
# 
# ### Masking adapter
# 
# To easily transform an existing dataset into a masking dataset `pytesmo` offers a adapter class that calls the
# `read_ts` method of an existing dataset and creates a masking dataset based on an operator, a given threshold, and (optionally) a column name.

# In[12]:


from pytesmo.validation_framework.adapters import MaskingAdapter

ds_mask = MaskingAdapter(ismn_reader, '<', 0.2, 'soil moisture')
print(ds_mask.read_ts(ids[0]).head())


# ### Self-masking adapter
# `pytesmo` also has a class that masks a dataset "on-the-fly", based on one of the columns it contains and an operator and a threshold. In contrast to the masking adapter mentioned above, the output of the self-masking adapter is the masked data, not the the mask. The self-masking adapter wraps a data reader, which must have a `read_ts` or `read` method. Calling its `read_ts`/`read` method will return the masked data - more precisely a DataFrame with only rows where the masking condition is true.

# In[13]:


from pytesmo.validation_framework.adapters import SelfMaskingAdapter

ds_mask = SelfMaskingAdapter(ismn_reader, '<', 0.2, 'soil moisture')
print(ds_mask.read_ts(ids[0]).head())

Esempio n. 10
0
def getdata():
    """
    handles the get request, which should contain the arguments listes under
    parameters

    Parameters
    ----------
    station_id: int
        id of station in database
    scaling: string
        chosen scaling method , for available choices see general.times_eries.scaling
    snow_depth: float
        mask snow depth greater than this value
    st_l1: float
        mask surface temperature layer1 lower than this value
    air_temp: float
        mask 2m air temperature lower than this value
    ssf_masking: boolean
        use SSF for masking true or false
    """
    station_id = request.args.get('station_id')
    scaling = request.args.get('scaling')
    if scaling == 'noscale':
        scaling = None
    masking_ids = request.args.getlist('masking_ds[]')
    masking_ops = request.args.getlist('masking_op[]')
    masking_values = request.args.getlist('masking_values[]')
    masking_values = [float(x) for x in masking_values]

    anomaly = request.args.get('anomaly')
    if anomaly == 'none':
        anomaly = None

    (depth_from,
     depth_to,
     sensor_id) = get_station_first_sm_layer(app.config['ISMN_PATH'],
                                             station_id)
    lon, lat = get_station_lonlat(app.config['ISMN_PATH'],
                                  station_id)
    start, end = get_station_start_end(app.config['ISMN_PATH'],
                                       station_id, "soil moisture",
                                       depth_from, depth_to)
    period = [start, end]

    masking_data = {'labels': [], 'data': []}
    masking_meta = get_masking_metadata()
    masking_masked_dict = None
    if len(masking_ids) > 0:
        # prepare masking datasets
        masking_ds_dict = get_masking_ds_dict(masking_ids)
        masking_masked_dict = {}
        for masking_ds, masking_op, masking_value in zip(masking_ids,
                                                         masking_ops,
                                                         masking_values):

            masking_masked_dict[masking_ds] = dict(masking_ds_dict[masking_ds])
            new_cls = MaskingAdapter(masking_masked_dict[masking_ds]['class'],
                                     masking_op,
                                     masking_value)
            masking_masked_dict[masking_ds]['class'] = new_cls

        # use DataManager for reading masking datasets
        masking_dm = DataManager(masking_ds_dict, masking_ids[0],
                                 period=period)
        masking_data = {}
        valid_masking_ids = []
        for mds in masking_ids:
            mdata = masking_dm.read_ds(mds, lon, lat)
            if mdata is not None:
                masking_data[mds] = mdata
                valid_masking_ids.append(mds)
            else:
                masking_data[mds] = pd.DataFrame()
        if len(valid_masking_ids) > 1:
            masking_data = BasicTemporalMatching(window=1.0).combinatory_matcher(
                masking_data, masking_ids[0], n=len(masking_ids))

            if len(masking_data) > 0:
                labels, values = masking_data[
                    masking_data.keys()[0]].to_dygraph_format()
        elif len(valid_masking_ids) == 1:
            masking_data = masking_data[valid_masking_ids[0]]
            labels, values = masking_data.to_dygraph_format()
        else:
            labels = [None]
            values = None

        for i, label in enumerate(labels):
            for mid in masking_meta:
                if masking_meta[mid]['variable']['name'] in label:
                    labels[i] = masking_meta[mid]['long_name']

        masking_data = {'labels': labels, 'data': values}

    ismn_iface = prepare_station_interface(app.config['ISMN_PATH'],
                                           station_id,
                                           "soil moisture",
                                           depth_from, depth_to, sensor_id)

    validation_ds_dict = get_validation_ds_dict()
    validation_ds_dict.update({'ISMN': {'class': ismn_iface,
                                        'columns': ['soil moisture']}})

    if anomaly is not None:
        adapter = {'climatology': AnomalyClimAdapter,
                   'average': AnomalyAdapter}
        for dataset in validation_ds_dict:
            validation_ds_dict[dataset]['class'] = adapter[
                anomaly](validation_ds_dict[dataset]['class'],
                         columns=validation_ds_dict[dataset]['columns'])

    mcalc = BasicMetricsPlusMSE(other_name='k1',
                                calc_tau=True).calc_metrics
    process = Validation(validation_ds_dict, 'ISMN',
                         temporal_ref='cci',
                         scaling=scaling,
                         metrics_calculators={(2, 2): mcalc},
                         masking_datasets=masking_masked_dict,
                         period=period,
                         temporal_window=1)

    df_dict = process.data_manager.get_data(1,
                                            lon,
                                            lat)

    matched_data, result, used_data = process.perform_validation(
        df_dict, (1, lon, lat))

    res_key = list(result)[0]
    data = used_data[res_key]
    result = result[res_key][0]

    # rename data to original names
    rename_dict = {}
    f = lambda x: "k{}".format(x) if x > 0 else 'ref'
    for i, r in enumerate(res_key):
        rename_dict[f(i)] = " ".join(r)

    data.rename(columns=rename_dict, inplace=True)

    labels, values = data.to_dygraph_format()

    validation_datasets = {'labels': labels, 'data': values}

    statistics = {'kendall': {'v': '%.2f' % result['tau'], 'p': '%.4f' % result['p_tau']},
                  'spearman': {'v': '%.2f' % result['rho'], 'p': '%.4f' % result['p_rho']},
                  'pearson': {'v': '%.2f' % result['R'], 'p': '%.4f' % result['p_R']},
                  'bias': '%.4f' % result['BIAS'],
                  'rmsd': {'rmsd': '%.4f' % np.sqrt(result['mse']),
                           'rmsd_corr': '%.4f' % np.sqrt(result['mse_corr']),
                           'rmsd_bias': '%.4f' % np.sqrt(result['mse_bias']),
                           'rmsd_var': '%.4f' % np.sqrt(result['mse_var'])},
                  'mse': {'mse': '%.4f' % result['mse'],
                          'mse_corr': '%.4f' % result['mse_corr'],
                          'mse_bias': '%.4f' % result['mse_bias'],
                          'mse_var': '%.4f' % result['mse_var']}}

    scaling_options = {'noscale': 'No scaling',
                       'porosity': 'Scale using porosity',
                       'linreg': 'Linear Regression',
                       'mean_std': 'Mean - standard deviation',
                       'min_max': 'Minimum,maximum',
                       'lin_cdf_match': 'Piecewise <br> linear CDF matching',
                       'cdf_match': 'CDF matching'}

    if scaling is None:
        scaling = 'noscale'

    masking_option_return = {}
    for mid, mops, mval in zip(masking_ids,
                               masking_ops,
                               masking_values):
        masking_option_return[mid] = {'op': mops,
                                      'val': mval,
                                      'name': masking_meta[mid]['long_name']}

    settings = {'scaling': scaling_options[scaling],
                'masking': masking_option_return}

    output_data = {'validation_data': validation_datasets, 'masking_data': masking_data,
                   'statistics': statistics, 'settings': settings}
    status = 1
    if status == -1:
        data = 'Error'
    else:
        data = jsonify(output_data)

    resp = make_response(data)
    resp.headers['Access-Control-Allow-Origin'] = '*'
    return resp