def test_misfits_computation(): response_dict = {} for realization_index, values in enumerate(responses_values): response_dict[realization_index] = pd.DataFrame( [values], columns=["A", "B", "C", "D", "E", "F", "G", "H"] ) observation_df = _get_dummy_observation_df() misfits_df = calculate_misfits_from_pandas( response_dict, observation_df, summary_misfits=False ) for id_real in univariate_misfits_results: assert_array_almost_equal( univariate_misfits_results[id_real], misfits_df.loc[id_real].values.flatten(), decimal=4, ) misfits_df = calculate_misfits_from_pandas( response_dict, observation_df, summary_misfits=True ) assert_array_almost_equal( summary_misfits_results, misfits_df.values.flatten(), decimal=4, )
async def get_response_misfits( *, db: Session = Depends(get_db), ensemble_id: UUID, response_name: str, realization_index: Optional[int] = None, summary_misfits: bool = False, ) -> Response: """ Compute univariate misfits for response(s) """ response_query = (db.query(ds.Record).filter( ds.Record.observations != None).join(ds.RecordInfo).filter_by( name=response_name, record_type=ds.RecordType.f64_matrix, ).join(ds.Ensemble).filter_by(id=ensemble_id)) if realization_index is not None: responses = [ response_query.filter( ds.Record.realization_index == realization_index).one() ] else: responses = response_query.all() observation_df = None response_dict = {} for response in responses: data_df = pd.DataFrame(response.f64_matrix.content) labels = response.f64_matrix.labels if labels is not None: data_df.columns = labels[0] data_df.index = labels[1] response_dict[response.realization_index] = data_df if observation_df is None: # currently we expect only a single observation object, while # later in the future this might change obs = response.observations[0] observation_df = pd.DataFrame(data={ "values": obs.values, "errors": obs.errors }, index=obs.x_axis) try: result_df = calculate_misfits_from_pandas(response_dict, observation_df, summary_misfits) except Exception as misfits_exc: raise exc.UnprocessableError( f"Unable to compute misfits: {misfits_exc}") return Response( content=result_df.to_csv().encode(), media_type="text/csv", )
async def get_response_misfits( *, res: LibresFacade = Depends(get_res), ensemble_id: UUID, response_name: str, realization_index: Optional[int] = None, summary_misfits: bool = False, ) -> Response: ensemble_name = get_name("ensemble", ensemble_id) dataframe = data_for_key(res, ensemble_name, response_name) if realization_index is not None: dataframe = pd.DataFrame(dataframe.loc[realization_index]).T response_dict = {} for index, data in dataframe.iterrows(): data_df = pd.DataFrame(data).T response_dict[index] = data_df obs_keys = res.observation_keys(response_name) obs = observations_for_obs_keys(res, ensemble_name, obs_keys) if not obs_keys: raise ValueError(f"No observations for key {response_name}") if not obs: raise ValueError(f"Cant fetch observations for key {response_name}") o = obs[0] def parse_index(x): try: return int(x) except ValueError: return parse(x) observation_df = pd.DataFrame( data={ "values": o["values"], "errors": o["errors"] }, index=[parse_index(x) for x in o["x_axis"]], ) try: result_df = calculate_misfits_from_pandas(response_dict, observation_df, summary_misfits) except Exception as misfits_exc: raise exc.UnprocessableError( f"Unable to compute misfits: {misfits_exc}") from misfits_exc return Response( content=result_df.to_csv().encode(), media_type="text/csv", )
def test_misfits_observations_match_response_values(): # response values are the same as observed values we expect zero misfits data_df = _get_dummy_response_df() observation_df = _get_dummy_observation_df() # set values to match observations data_df.loc[0, observation["x_axis"]] = observation_df["values"].values misfits_df = calculate_misfits_from_pandas( {0: data_df}, observation_df, summary_misfits=False ) assert_array_almost_equal( np.zeros(3), misfits_df.values.flatten(), decimal=4, )
def test_misfits_increasing_observation_error(): # increasing erros should provide lower values of misfits data_df = _get_dummy_response_df() observation_df = _get_dummy_observation_df() misfits_increased_error = {} for idx in range(3): # increase the error observation_df["errors"] += 1 misfits_increased_error[idx] = ( calculate_misfits_from_pandas( {0: data_df}, observation_df, summary_misfits=False ) .abs() # required as univariate misfits now come with a sign .values.flatten() ) assert_array_less(misfits_increased_error[1], misfits_increased_error[0]) assert_array_less(misfits_increased_error[2], misfits_increased_error[1])
def test_misfits_increasing_response_values(): # increasing response values compared to observation mean should # provide higher values of misfits data_df = _get_dummy_response_df() observation_df = _get_dummy_observation_df() misfits_increased_responses = {} for idx in range(3): # set response values as observation values and add a constant data_df.loc[0, observation["x_axis"]] = observation_df["values"] + idx misfits_increased_responses[idx] = ( calculate_misfits_from_pandas( {0: data_df}, observation_df, summary_misfits=False ) .abs() # required as univariate misfits now come with a sign .values.flatten() ) assert_array_less(misfits_increased_responses[0], misfits_increased_responses[1]) assert_array_less(misfits_increased_responses[1], misfits_increased_responses[2])