def test_create_from_ensemble_csv(testdata_folder: Path, tmp_path: Path) -> None: factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True) csv_filename = (testdata_folder / "reek_test_data" / "aggregated_data" / "smry_hm.csv") provider = factory.create_from_ensemble_csv_file(csv_filename, "iter-0") vecnames = provider.vector_names() assert len(vecnames) == 473 assert vecnames[0] == "BPR:15,28,1" assert vecnames[472] == "YEARS" realizations = provider.realizations() assert len(realizations) == 10 dates = provider.dates(None) assert len(dates) == 38 assert isinstance(dates[0], datetime.datetime) assert dates[0] == datetime.datetime(2000, 1, 1) assert dates[-1] == datetime.datetime(2003, 2, 1) vecdf = provider.get_vectors_df(["FOPR"], None) assert vecdf.shape == (380, 3) assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"] assert vecdf["REAL"].nunique() == 10 sampleddate = vecdf["DATE"][0] assert isinstance(sampleddate, datetime.datetime) vecdf = provider.get_vectors_df(["FOPR"], None, [1]) assert vecdf.shape == (38, 3) assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"] assert vecdf["REAL"].nunique() == 1
def test_create_from_per_realization_csv_file(testdata_folder: Path, tmp_path: Path) -> None: _split_aggr_csv_into_per_real( str(testdata_folder / "reek_test_data/aggregated_data/smry.csv"), str(tmp_path / "fake_data"), ) factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True) ens_path = tmp_path / "fake_data/realization-*/iter-0" csvfile = "smry.csv" provider = factory.create_from_per_realization_csv_file( str(ens_path), csvfile) vecnames = provider.vector_names() assert len(vecnames) == 16 assert vecnames[0] == "FGIP" assert vecnames[15] == "YEARS" realizations = provider.realizations() assert len(realizations) == 10 vecdf = provider.get_vectors_df(["FOPR"], None) assert vecdf.shape == (380, 3) assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"] assert vecdf["REAL"].nunique() == 10 sampleddate = vecdf["DATE"][0] assert isinstance(sampleddate, datetime.datetime) vecdf = provider.get_vectors_df(["FOPR"], None, [1]) assert vecdf.shape == (38, 3) assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"] assert vecdf["REAL"].nunique() == 1
def create_presampled_provider_set_from_paths( name_path_dict: Dict[str, Path], rel_file_pattern: str, presampling_frequency: Frequency, ) -> ProviderSet: """Create set of providers without lazy resampling, but with specified frequency, from dictionary of ensemble name and corresponding arrow file paths `Input:` * name_path_dict: Dict[str, Path] - ensemble name as key and arrow file path as value * rel_file_pattern: str - specify a relative (per realization) file pattern to find the wanted .arrow files within each realization * presampling_frequency: Frequency - Frequency to sample input data in factory with, during import. `Return:` Provider set with ensemble summary providers with presampled data according to specified presampling frequency. """ # TODO: Make presampling_frequency: Optional[Frequency] when allowing raw data for plugin provider_factory = EnsembleSummaryProviderFactory.instance() provider_dict: Dict[str, EnsembleSummaryProvider] = {} for name, path in name_path_dict.items(): provider_dict[name] = provider_factory.create_from_arrow_unsmry_presampled( str(path), rel_file_pattern, presampling_frequency ) return ProviderSet(provider_dict)
def test_create_from_arrow_unsmry_lazy(testdata_folder: Path, tmp_path: Path) -> None: ensemble_path = str(testdata_folder / "01_drogon_ahm/realization-*/iter-0") # Used to generate test results # _dump_smry_to_csv_using_fmu(ensemble_path, "monthly", "expected_smry.csv") factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True) provider = factory.create_from_arrow_unsmry_lazy( ens_path=ensemble_path, rel_file_pattern="share/results/unsmry/*.arrow") assert provider.supports_resampling() assert provider.vector_metadata("FOPT") is not None vecnames = provider.vector_names() assert len(vecnames) == 931 dates = provider.dates(Frequency.MONTHLY) assert len(dates) == 31 assert isinstance(dates[0], datetime.datetime) assert dates[0] == datetime.datetime(2018, 1, 1) assert dates[-1] == datetime.datetime(2020, 7, 1) realizations = provider.realizations() assert len(realizations) == 100 assert realizations[0] == 0 assert realizations[-1] == 99 vecdf = provider.get_vectors_df(["FOPR"], Frequency.MONTHLY) assert vecdf.shape == (3100, 3) assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"] assert vecdf["DATE"].nunique() == 31 assert vecdf["REAL"].nunique() == 100 sampleddate = vecdf["DATE"][0] assert isinstance(sampleddate, datetime.datetime) vecdf = provider.get_vectors_df(["FOPR"], Frequency.MONTHLY, [5]) assert vecdf.shape == (31, 3) assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"] assert vecdf["DATE"].nunique() == 31 assert vecdf["REAL"].nunique() == 1 assert vecdf["REAL"][0] == 5
def test_arrow_unsmry_lazy_vector_metadata(testdata_folder: Path, tmp_path: Path) -> None: ensemble_path = str(testdata_folder / "01_drogon_ahm/realization-*/iter-0") factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True) provider = factory.create_from_arrow_unsmry_lazy( ens_path=ensemble_path, rel_file_pattern="share/results/unsmry/*.arrow") meta: Optional[VectorMetadata] = provider.vector_metadata("FOPR") assert meta is not None assert meta.unit == "SM3/DAY" assert meta.is_total is False assert meta.is_rate is True assert meta.is_historical is False assert meta.keyword == "FOPR" assert meta.wgname is None assert meta.get_num == 0 meta = provider.vector_metadata("WOPTH:A6") assert meta is not None assert meta.unit == "SM3" assert meta.is_total is True assert meta.is_rate is False assert meta.is_historical is True assert meta.keyword == "WOPTH" assert meta.wgname == "A6" assert meta.get_num == 11 meta = provider.vector_metadata("FWCT") assert meta is not None assert meta.unit == "" assert meta.is_total is False assert meta.is_rate is True assert meta.is_historical is False assert meta.keyword == "FWCT" assert meta.wgname is None assert meta.get_num == 0
def __init__( self, app: dash.Dash, webviz_settings: WebvizSettings, ensembles: list, gruptree_file: str = "share/results/tables/gruptree.csv", rel_file_pattern: str = "share/results/unsmry/*.arrow", time_index: str = "yearly", ): super().__init__() assert time_index in [ "monthly", "yearly", ], "time_index must be monthly or yearly" self._ensembles = ensembles self._gruptree_file = gruptree_file if ensembles is None: raise ValueError('Incorrect argument, must provide "ensembles"') sampling = Frequency(time_index) self._ensemble_paths: Dict[str, Path] = { ensemble_name: webviz_settings.shared_settings["scratch_ensembles"][ensemble_name] for ensemble_name in ensembles } provider_factory = EnsembleSummaryProviderFactory.instance() self._group_tree_data: Dict[str, EnsembleGroupTreeData] = {} sampling = Frequency(time_index) for ens_name, ens_path in self._ensemble_paths.items(): provider: EnsembleSummaryProvider = ( provider_factory.create_from_arrow_unsmry_presampled( str(ens_path), rel_file_pattern, sampling)) self._group_tree_data[ens_name] = EnsembleGroupTreeData( provider, GruptreeModel(ens_name, ens_path, gruptree_file)) self.set_callbacks(app)
def create_lazy_provider_set_from_paths( name_path_dict: Dict[str, Path], rel_file_pattern: str, ) -> ProviderSet: """Create set of providers with lazy (on-demand) resampling/interpolation, from dictionary of ensemble name and corresponding arrow file paths `Input:` * name_path_dict: Dict[str, Path] - ensemble name as key and arrow file path as value * rel_file_pattern: str - specify a relative (per realization) file pattern to find the wanted .arrow files within each realization `Return:` Provider set with ensemble summary providers with lazy (on-demand) resampling/interpolation """ provider_factory = EnsembleSummaryProviderFactory.instance() provider_dict: Dict[str, EnsembleSummaryProvider] = {} for name, path in name_path_dict.items(): provider_dict[name] = provider_factory.create_from_arrow_unsmry_lazy( str(path), rel_file_pattern) return ProviderSet(provider_dict)
def __init__( self, app: Dash, webviz_settings: WebvizSettings, ensembles: Optional[list] = None, rel_file_pattern: str = "share/results/unsmry/*.arrow", statistics_file: str = "share/results/tables/gridpropstatistics.csv", surface_renaming: Optional[dict] = None, time_index: str = "monthly", column_keys: Optional[list] = None, csvfile_statistics: Path = None, csvfile_smry: Path = None, ): super().__init__() self.theme: WebvizConfigTheme = webviz_settings.theme self.ensembles = ensembles self._surface_folders: Union[dict, None] = None self._vmodel: Optional[Union[SimulationTimeSeriesModel, ProviderTimeSeriesDataModel]] = None run_mode_portable = WEBVIZ_INSTANCE_INFO.run_mode == WebvizRunMode.PORTABLE table_provider = EnsembleTableProviderFactory.instance() if ensembles is not None: ensemble_paths = { ensemble_name: webviz_settings.shared_settings["scratch_ensembles"] [ensemble_name] for ensemble_name in ensembles } resampling_frequency = Frequency(time_index) provider_factory = EnsembleSummaryProviderFactory.instance() try: provider_set = { ens: provider_factory.create_from_arrow_unsmry_presampled( str(ens_path), rel_file_pattern, resampling_frequency) for ens, ens_path in ensemble_paths.items() } self._vmodel = ProviderTimeSeriesDataModel( provider_set=provider_set, column_keys=column_keys) property_df = create_df_from_table_provider( table_provider. create_provider_set_from_per_realization_csv_file( ensemble_paths, statistics_file)) except ValueError as error: message = ( f"Some/all ensembles are missing arrow files at {rel_file_pattern}.\n" "If no arrow files have been generated with `ERT` using `ECL2CSV`, " "the commandline tool `smry2arrow_batch` can be used to generate arrow " "files for an ensemble") if not run_mode_portable: raise ValueError(message) from error # NOTE: this part below is to ensure backwards compatibility for portable app's # created before the arrow support. It should be removed in the future. emodel: EnsembleSetModel = ( caching_ensemble_set_model_factory.get_or_create_model( ensemble_paths=ensemble_paths, time_index=time_index, column_keys=column_keys, )) self._vmodel = SimulationTimeSeriesModel( dataframe=emodel.get_or_load_smry_cached()) property_df = emodel.load_csv(csv_file=Path(statistics_file)) self._surface_folders = { ens: Path(ens_path.split("realization")[0]) / "share/results/maps" / ens for ens, ens_path in ensemble_paths.items() } else: if csvfile_statistics is None: raise ValueError( "If not 'ensembles', then csvfile_statistics must be provided" ) # NOTE: the try/except is for backwards compatibility with existing portable app's. # It should be removed in the future together with the support of aggregated csv-files try: property_df = create_df_from_table_provider( table_provider. create_provider_set_from_aggregated_csv_file( csvfile_statistics)) except FileNotFoundError: if not run_mode_portable: raise property_df = read_csv(csvfile_statistics) if csvfile_smry is not None: try: smry_df = create_df_from_table_provider( table_provider. create_provider_set_from_aggregated_csv_file( csvfile_smry)) except FileNotFoundError: if not run_mode_portable: raise smry_df = read_csv(csvfile_smry) self._vmodel = SimulationTimeSeriesModel(dataframe=smry_df) self._pmodel = PropertyStatisticsModel(dataframe=property_df, theme=self.theme) self._surface_renaming = surface_renaming if surface_renaming else {} self._surface_table = generate_surface_table( statistics_dframe=self._pmodel.dataframe, ensembles=self._pmodel.ensembles, surface_folders=self._surface_folders, surface_renaming=self._surface_renaming, ) self.set_callbacks(app)
def __init__( self, app, webviz_settings: WebvizSettings, parameter_csv: Path = None, response_csv: Path = None, ensembles: list = None, rel_file_pattern: str = "share/results/unsmry/*.arrow", response_file: str = None, response_filters: dict = None, response_ignore: list = None, response_include: list = None, column_keys: list = None, sampling: str = "monthly", aggregation: str = "sum", corr_method: str = "pearson", ): super().__init__() self.parameter_csv = parameter_csv if parameter_csv else None self.response_csv = response_csv if response_csv else None self.response_file = response_file if response_file else None self.response_filters = response_filters if response_filters else {} self.column_keys = column_keys self._sampling = Frequency(sampling) self.corr_method = corr_method self.aggregation = aggregation if response_ignore and response_include: raise ValueError( 'Incorrent argument. either provide "response_include", ' '"response_ignore" or neither') if parameter_csv and response_csv: if ensembles or response_file: raise ValueError( 'Incorrect arguments. Either provide "csv files" or ' '"ensembles and response_file".') parameterdf = read_csv(self.parameter_csv) self.responsedf = read_csv(self.response_csv) elif ensembles: self.ens_paths = { ens: webviz_settings.shared_settings["scratch_ensembles"][ens] for ens in ensembles } table_provider_factory = EnsembleTableProviderFactory.instance() parameterdf = create_df_from_table_provider( table_provider_factory. create_provider_set_from_per_realization_parameter_file( self.ens_paths)) if self.response_file: self.responsedf = load_csv( ensemble_paths=self.ens_paths, csv_file=response_file, ensemble_set_name="EnsembleSet", ) else: smry_provider_factory = EnsembleSummaryProviderFactory.instance( ) provider_set = { ens_name: smry_provider_factory.create_from_arrow_unsmry_presampled( ens_path, rel_file_pattern, self._sampling) for ens_name, ens_path in self.ens_paths.items() } self.response_filters["DATE"] = "single" self.responsedf = create_df_from_summary_provider( provider_set, self.column_keys, ) else: raise ValueError( 'Incorrect arguments. Either provide "csv files" or "ensembles and response_file".' ) pmodel = ParametersModel(dataframe=parameterdf, keep_numeric_only=True, drop_constants=True) self.parameterdf = pmodel.dataframe self.parameter_columns = pmodel.parameters parresp.check_runs(self.parameterdf, self.responsedf) parresp.check_response_filters(self.responsedf, self.response_filters) # Only select numerical responses self.response_columns = parresp.filter_numerical_columns( df=self.responsedf, column_ignore=response_ignore, column_include=response_include, filter_columns=self.response_filters.keys(), ) self.theme = webviz_settings.theme self.set_callbacks(app)