async def get_response_misfits( *, db: Session = Depends(get_db), ensemble_id: UUID, response_name: str, realization_index: Optional[int] = None, summary_misfits: bool = False, ) -> Response: """ Compute univariate misfits for response(s) """ response_query = (db.query(ds.Record).filter( ds.Record.observations != None).join(ds.RecordInfo).filter_by( name=response_name, record_type=ds.RecordType.f64_matrix, ).join(ds.Ensemble).filter_by(id=ensemble_id)) if realization_index is not None: responses = [ response_query.filter( ds.Record.realization_index == realization_index).one() ] else: responses = response_query.all() observation_df = None response_dict = {} for response in responses: data_df = pd.DataFrame(response.f64_matrix.content) labels = response.f64_matrix.labels if labels is not None: data_df.columns = labels[0] data_df.index = labels[1] response_dict[response.realization_index] = data_df if observation_df is None: # currently we expect only a single observation object, while # later in the future this might change obs = response.observations[0] observation_df = pd.DataFrame(data={ "values": obs.values, "errors": obs.errors }, index=obs.x_axis) try: result_df = calculate_misfits_from_pandas(response_dict, observation_df, summary_misfits) except Exception as misfits_exc: raise exc.UnprocessableError( f"Unable to compute misfits: {misfits_exc}") return Response( content=result_df.to_csv().encode(), media_type="text/csv", )
async def get_response_misfits( *, res: LibresFacade = Depends(get_res), ensemble_id: UUID, response_name: str, realization_index: Optional[int] = None, summary_misfits: bool = False, ) -> Response: ensemble_name = get_name("ensemble", ensemble_id) dataframe = data_for_key(res, ensemble_name, response_name) if realization_index is not None: dataframe = pd.DataFrame(dataframe.loc[realization_index]).T response_dict = {} for index, data in dataframe.iterrows(): data_df = pd.DataFrame(data).T response_dict[index] = data_df obs_keys = res.observation_keys(response_name) obs = observations_for_obs_keys(res, ensemble_name, obs_keys) if not obs_keys: raise ValueError(f"No observations for key {response_name}") if not obs: raise ValueError(f"Cant fetch observations for key {response_name}") o = obs[0] def parse_index(x): try: return int(x) except ValueError: return parse(x) observation_df = pd.DataFrame( data={ "values": o["values"], "errors": o["errors"] }, index=[parse_index(x) for x in o["x_axis"]], ) try: result_df = calculate_misfits_from_pandas(response_dict, observation_df, summary_misfits) except Exception as misfits_exc: raise exc.UnprocessableError( f"Unable to compute misfits: {misfits_exc}") from misfits_exc return Response( content=result_df.to_csv().encode(), media_type="text/csv", )
async def post_record_observations( *, db: Session = Depends(get_db), record: ds.Record = Depends(get_record_by_name), observation_ids: List[UUID] = Body(...), ) -> None: observations = ( db.query(ds.Observation).filter(ds.Observation.id.in_(observation_ids)).all() ) if observations: record.observations = observations db.commit() else: raise exc.UnprocessableError(f"Observations {observation_ids} not found!")
def _get_record_dataframe( record: ds.Record, realization_index: Optional[int], label: Optional[str], ) -> pd.DataFrame: type_ = record.record_info.record_type if type_ != ds.RecordType.f64_matrix: raise exc.ExpectationError("Non matrix record not supported") labels = record.f64_matrix.labels content_is_labeled = labels is not None label_specified = label is not None if content_is_labeled and label_specified and label not in labels[0]: raise exc.UnprocessableError(f"Record label '{label}' not found!") if realization_index is None or record.realization_index is not None: matrix_content = record.f64_matrix.content elif record.realization_index is None: matrix_content = record.f64_matrix.content[realization_index] if not isinstance(matrix_content[0], List): matrix_content = [matrix_content] if content_is_labeled and label_specified: lbl_idx = labels[0].index(label) data = pd.DataFrame([[c[lbl_idx]] for c in matrix_content]) data.columns = [label] elif content_is_labeled: data = pd.DataFrame(matrix_content) data.columns = labels[0] else: data = pd.DataFrame(matrix_content) # Set data index for labled content if content_is_labeled: if record.realization_index is not None: data.index = [record.realization_index] elif realization_index is not None: data.index = [realization_index] else: data.index = labels[1] return data
def new_record_matrix( *, db: Session = Depends(get_db), record: ds.Record = Depends(new_record), prior: Optional[str] = None, ) -> ds.Record: ensemble = record.record_info.ensemble if record.name in ensemble.parameter_names: record_class = ds.RecordClass.parameter elif record.name in ensemble.response_names: record_class = ds.RecordClass.response else: record_class = ds.RecordClass.other if prior is not None: if record_class is not ds.RecordClass.parameter: raise exc.UnprocessableError( "Priors can only be specified for parameter records" ) record.record_info.prior = db.query(ds.Prior).filter_by(name=prior).one() record.record_info.record_class = record_class record.record_info.record_type = ds.RecordType.f64_matrix return record
async def post_ensemble_record_matrix( *, db: Session = Depends(get_db), record: ds.Record = Depends(new_record_matrix), content_type: str = Header("application/json"), request: Request, ) -> js.RecordOut: """ Assign an n-dimensional float matrix, encoded in JSON, to the given `name` record. """ if content_type == "application/x-dataframe": logger.warning( "Content-Type with 'application/x-dataframe' is deprecated. Use 'text/csv' instead." ) content_type = "text/csv" labels = None try: if content_type == "application/json": content = np.array(await request.json(), dtype=np.float64) elif content_type == "application/x-numpy": from numpy.lib.format import read_array stream = io.BytesIO(await request.body()) content = read_array(stream) elif content_type == "text/csv": stream = io.BytesIO(await request.body()) df = pd.read_csv(stream, index_col=0, float_precision="round_trip") content = df.values labels = [ [str(v) for v in df.columns.values], [str(v) for v in df.index.values], ] elif content_type == "application/x-parquet": stream = io.BytesIO(await request.body()) df = pd.read_parquet(stream) content = df.values labels = [ [v for v in df.columns.values], [v for v in df.index.values], ] else: raise ValueError() except ValueError: if record.realization_index is None: message = f"Ensemble-wide record '{record.name}' for needs to be a matrix" else: message = f"Forward-model record '{record.name}' for realization {record.realization_index} needs to be a matrix" raise exc.UnprocessableError(message) # Require that the dimensionality of an ensemble-wide parameter matrix is at least 2 if ( record.realization_index is None and record.record_class is ds.RecordClass.parameter ): if content.ndim <= 1: raise exc.UnprocessableError( f"Ensemble-wide parameter record '{record.name}' for ensemble '{record.record_info.ensemble.id}'" "must have dimensionality of at least 2" ) matrix_obj = ds.F64Matrix(content=content.tolist(), labels=labels) record.f64_matrix = matrix_obj return _create_record(db, record)