def test_create_from_aggregated_csv_file_smry_csv(testdata_folder: Path, tmp_path: Path) -> None: factory = EnsembleTableProviderFactory(tmp_path, backing_type=BACKING_TYPE_TO_TEST, allow_storage_writes=True) providerset = factory.create_provider_set_from_aggregated_csv_file( testdata_folder / "reek_test_data" / "aggregated_data" / "smry.csv") assert providerset.ensemble_names() == ["iter-0"] provider = providerset.ensemble_provider("iter-0") assert len(provider.column_names()) == 17 assert provider.column_names()[0] == "DATE" assert provider.column_names()[16] == "YEARS" assert len(provider.realizations()) == 40 valdf = provider.get_column_data(["YEARS"]) assert len(valdf.columns) == 2 assert valdf.columns[0] == "REAL" assert valdf.columns[1] == "YEARS" assert valdf["REAL"].nunique() == 40 valdf = provider.get_column_data(["YEARS"], [0, 39, 10]) assert len(valdf.columns) == 2 assert valdf.columns[0] == "REAL" assert valdf.columns[1] == "YEARS" assert valdf["REAL"].nunique() == 3
def test_create_from_per_realization_csv_file(testdata_folder: Path, tmp_path: Path) -> None: ensembles: Dict[str, str] = { "iter-0": str(testdata_folder / "01_drogon_ahm/realization-*/iter-0"), "iter-3": str(testdata_folder / "01_drogon_ahm/realization-*/iter-3"), } csvfile = "share/results/tables/rft.csv" factory = EnsembleTableProviderFactory(tmp_path, backing_type=BACKING_TYPE_TO_TEST, allow_storage_writes=True) providerset = factory.create_provider_set_from_per_realization_csv_file( ensembles, csvfile) assert providerset.ensemble_names() == ["iter-0", "iter-3"] provider = providerset.ensemble_provider("iter-0") all_column_names = provider.column_names() # print(all_column_names) assert len(all_column_names) == 13 assert len(provider.realizations()) == 100 valdf = provider.get_column_data(["CONIDX"], [2]) assert valdf.shape == (218, 2) assert valdf.columns[0] == "REAL" assert valdf.columns[1] == "CONIDX" assert valdf["REAL"].unique() == [2] assert valdf["CONIDX"].nunique() == 24 assert sorted(valdf["CONIDX"].unique()) == list(range(1, 25))
def __init__( self, app: Dash, webviz_settings: WebvizSettings, ensembles: Optional[list] = None, rel_file_pattern: str = "share/results/unsmry/*.arrow", statistics_file: str = "share/results/tables/gridpropstatistics.csv", surface_renaming: Optional[dict] = None, time_index: str = "monthly", column_keys: Optional[list] = None, csvfile_statistics: Path = None, csvfile_smry: Path = None, ): super().__init__() self.theme: WebvizConfigTheme = webviz_settings.theme self.ensembles = ensembles self._surface_folders: Union[dict, None] = None self._vmodel: Optional[Union[SimulationTimeSeriesModel, ProviderTimeSeriesDataModel]] = None run_mode_portable = WEBVIZ_INSTANCE_INFO.run_mode == WebvizRunMode.PORTABLE table_provider = EnsembleTableProviderFactory.instance() if ensembles is not None: ensemble_paths = { ensemble_name: webviz_settings.shared_settings["scratch_ensembles"] [ensemble_name] for ensemble_name in ensembles } resampling_frequency = Frequency(time_index) provider_factory = EnsembleSummaryProviderFactory.instance() try: provider_set = { ens: provider_factory.create_from_arrow_unsmry_presampled( str(ens_path), rel_file_pattern, resampling_frequency) for ens, ens_path in ensemble_paths.items() } self._vmodel = ProviderTimeSeriesDataModel( provider_set=provider_set, column_keys=column_keys) property_df = create_df_from_table_provider( table_provider. create_provider_set_from_per_realization_csv_file( ensemble_paths, statistics_file)) except ValueError as error: message = ( f"Some/all ensembles are missing arrow files at {rel_file_pattern}.\n" "If no arrow files have been generated with `ERT` using `ECL2CSV`, " "the commandline tool `smry2arrow_batch` can be used to generate arrow " "files for an ensemble") if not run_mode_portable: raise ValueError(message) from error # NOTE: this part below is to ensure backwards compatibility for portable app's # created before the arrow support. It should be removed in the future. emodel: EnsembleSetModel = ( caching_ensemble_set_model_factory.get_or_create_model( ensemble_paths=ensemble_paths, time_index=time_index, column_keys=column_keys, )) self._vmodel = SimulationTimeSeriesModel( dataframe=emodel.get_or_load_smry_cached()) property_df = emodel.load_csv(csv_file=Path(statistics_file)) self._surface_folders = { ens: Path(ens_path.split("realization")[0]) / "share/results/maps" / ens for ens, ens_path in ensemble_paths.items() } else: if csvfile_statistics is None: raise ValueError( "If not 'ensembles', then csvfile_statistics must be provided" ) # NOTE: the try/except is for backwards compatibility with existing portable app's. # It should be removed in the future together with the support of aggregated csv-files try: property_df = create_df_from_table_provider( table_provider. create_provider_set_from_aggregated_csv_file( csvfile_statistics)) except FileNotFoundError: if not run_mode_portable: raise property_df = read_csv(csvfile_statistics) if csvfile_smry is not None: try: smry_df = create_df_from_table_provider( table_provider. create_provider_set_from_aggregated_csv_file( csvfile_smry)) except FileNotFoundError: if not run_mode_portable: raise smry_df = read_csv(csvfile_smry) self._vmodel = SimulationTimeSeriesModel(dataframe=smry_df) self._pmodel = PropertyStatisticsModel(dataframe=property_df, theme=self.theme) self._surface_renaming = surface_renaming if surface_renaming else {} self._surface_table = generate_surface_table( statistics_dframe=self._pmodel.dataframe, ensembles=self._pmodel.ensembles, surface_folders=self._surface_folders, surface_renaming=self._surface_renaming, ) self.set_callbacks(app)
def __init__( self, app: Dash, webviz_settings: WebvizSettings, csvfile: str = None, ensembles: list = None, aggregated_csvfile: Path = None, aggregated_parameterfile: Path = None, observation_file: Path = None, observation_group: str = "general", remap_observation_keys: Dict[str, str] = None, remap_observation_values: Dict[str, str] = None, colors: Dict = None, initial_data: Dict = None, initial_layout: Dict = None, ): super().__init__() provider = EnsembleTableProviderFactory.instance() self._initial_data = initial_data if initial_data else {} self._initial_layout = initial_layout if initial_layout else {} if ensembles is not None and csvfile is not None: ensembles_dict: Dict[str, str] = { ens_name: webviz_settings.shared_settings["scratch_ensembles"][ens_name] for ens_name in ensembles } self._parameterproviderset = ( provider. create_provider_set_from_per_realization_parameter_file( ensembles_dict)) self._tableproviderset = ( provider.create_provider_set_from_per_realization_csv_file( ensembles_dict, csvfile)) self._ensemble_names = ensembles elif aggregated_csvfile and aggregated_parameterfile is not None: self._tableproviderset = ( provider.create_provider_set_from_aggregated_csv_file( aggregated_csvfile)) self._parameterproviderset = ( provider.create_provider_set_from_aggregated_csv_file( aggregated_parameterfile)) self._ensemble_names = self._tableproviderset.ensemble_names() else: raise ValueError( "Specify either ensembles and csvfile or aggregated_csvfile " "and aggregated_parameterfile") all_parameters: list = [ self._parameterproviderset.ensemble_provider(ens).column_names() for ens in self._ensemble_names ] self._parameter_names: list = list(set().union(*all_parameters)) all_data_columns: list = [ self._tableproviderset.ensemble_provider(ens).column_names() for ens in self._ensemble_names ] self._data_column_names: list = list(set().union(*all_data_columns)) dfs = [] for ens in self._ensemble_names: df = self._parameterproviderset.ensemble_provider( ens).get_column_data(column_names=self._parameterproviderset. ensemble_provider(ens).column_names()) df["ENSEMBLE"] = ens dfs.append(df) parameterdf = pd.concat(dfs) self._realizations = sorted(list(parameterdf["REAL"].unique())) self._parameter_filter = ParameterFilter(self.uuid("parameter-filter"), parameterdf) self._observationfile = observation_file self._observationmodel = (ObservationModel( get_path(self._observationfile), observation_group, remap_observation_keys, remap_observation_values, ) if self._observationfile else None) WEBVIZ_ASSETS.add( Path(webviz_subsurface.__file__).parent / "_assets" / "js" / "clientside_functions.js") self._colors: Dict = unique_colors(self._ensemble_names, webviz_settings.theme) if colors is not None: self._colors.update(colors) self.set_callbacks(app)
def __init__( self, app, webviz_settings: WebvizSettings, parameter_csv: Path = None, response_csv: Path = None, ensembles: list = None, rel_file_pattern: str = "share/results/unsmry/*.arrow", response_file: str = None, response_filters: dict = None, response_ignore: list = None, response_include: list = None, column_keys: list = None, sampling: str = "monthly", aggregation: str = "sum", corr_method: str = "pearson", ): super().__init__() self.parameter_csv = parameter_csv if parameter_csv else None self.response_csv = response_csv if response_csv else None self.response_file = response_file if response_file else None self.response_filters = response_filters if response_filters else {} self.column_keys = column_keys self._sampling = Frequency(sampling) self.corr_method = corr_method self.aggregation = aggregation if response_ignore and response_include: raise ValueError( 'Incorrent argument. either provide "response_include", ' '"response_ignore" or neither') if parameter_csv and response_csv: if ensembles or response_file: raise ValueError( 'Incorrect arguments. Either provide "csv files" or ' '"ensembles and response_file".') parameterdf = read_csv(self.parameter_csv) self.responsedf = read_csv(self.response_csv) elif ensembles: self.ens_paths = { ens: webviz_settings.shared_settings["scratch_ensembles"][ens] for ens in ensembles } table_provider_factory = EnsembleTableProviderFactory.instance() parameterdf = create_df_from_table_provider( table_provider_factory. create_provider_set_from_per_realization_parameter_file( self.ens_paths)) if self.response_file: self.responsedf = load_csv( ensemble_paths=self.ens_paths, csv_file=response_file, ensemble_set_name="EnsembleSet", ) else: smry_provider_factory = EnsembleSummaryProviderFactory.instance( ) provider_set = { ens_name: smry_provider_factory.create_from_arrow_unsmry_presampled( ens_path, rel_file_pattern, self._sampling) for ens_name, ens_path in self.ens_paths.items() } self.response_filters["DATE"] = "single" self.responsedf = create_df_from_summary_provider( provider_set, self.column_keys, ) else: raise ValueError( 'Incorrect arguments. Either provide "csv files" or "ensembles and response_file".' ) pmodel = ParametersModel(dataframe=parameterdf, keep_numeric_only=True, drop_constants=True) self.parameterdf = pmodel.dataframe self.parameter_columns = pmodel.parameters parresp.check_runs(self.parameterdf, self.responsedf) parresp.check_response_filters(self.responsedf, self.response_filters) # Only select numerical responses self.response_columns = parresp.filter_numerical_columns( df=self.responsedf, column_ignore=response_ignore, column_include=response_include, filter_columns=self.response_filters.keys(), ) self.theme = webviz_settings.theme self.set_callbacks(app)
def __init__( self, app: Dash, webviz_settings: WebvizSettings, csvfile: str = None, ensemble: str = None, aggregated_csvfile: Path = None, aggregated_parameterfile: Path = None, initial_response: str = None, single_value_selectors: List[str] = None, multi_value_selectors: List[str] = None, ): super().__init__() self._single_filters = single_value_selectors if single_value_selectors else [] self._multi_filters = multi_value_selectors if multi_value_selectors else [] provider = EnsembleTableProviderFactory.instance() if ensemble is not None and csvfile is not None: ensemble_dict: Dict[str, str] = { ensemble: webviz_settings.shared_settings["scratch_ensembles"][ensemble] } self._parameterproviderset = ( provider. create_provider_set_from_per_realization_parameter_file( ensemble_dict)) self._tableproviderset = ( provider.create_provider_set_from_per_realization_csv_file( ensemble_dict, csvfile)) self._ensemble_name = ensemble elif aggregated_csvfile and aggregated_parameterfile is not None: self._tableproviderset = ( provider.create_provider_set_from_aggregated_csv_file( aggregated_csvfile)) self._parameterproviderset = ( provider.create_provider_set_from_aggregated_csv_file( aggregated_parameterfile)) if len(self._tableproviderset.ensemble_names()) != 1: raise ValueError("Csv file has multiple ensembles. " "This plugin only supports a single ensemble") self._ensemble_name = self._tableproviderset.ensemble_names()[0] else: raise ValueError( "Specify either ensembles and csvfile or aggregated_csvfile " "and aggregated_parameterfile") try: design_matrix_df = self._parameterproviderset.ensemble_provider( self._ensemble_name).get_column_data( column_names=["SENSNAME", "SENSCASE"]) except KeyError as exc: raise KeyError( "Required columns 'SENSNAME' and 'SENSCASE' is missing " f"from {self._ensemble_name}. Cannot calculate tornado plots" ) from exc design_matrix_df["ENSEMBLE"] = self._ensemble_name design_matrix_df["SENSTYPE"] = design_matrix_df.apply( lambda row: find_sens_type(row.SENSCASE), axis=1) self._tornado_widget = TornadoWidget(realizations=design_matrix_df, app=app, webviz_settings=webviz_settings) self._responses: List[str] = self._tableproviderset.ensemble_provider( self._ensemble_name).column_names() if self._single_filters: self._responses = [ response for response in self._responses if response not in self._single_filters ] if self._multi_filters: self._responses = [ response for response in self._responses if response not in self._multi_filters ] self._initial_response: str = (initial_response if initial_response else self._responses[0]) self.set_callbacks(app)