Example #1
0
    def __init__(
        self,
        name: str,
        provider: EnsembleSummaryProvider,
        vectors: List[str],
        expressions: Optional[List[ExpressionInfo]] = None,
        resampling_frequency: Optional[Frequency] = None,
        relative_date: Optional[datetime.datetime] = None,
    ) -> None:
        # Initialize base class
        super().__init__(provider.realizations())

        self._name = name
        self._provider = provider
        self._provider_vectors = [
            vector for vector in vectors if vector in self._provider.vector_names()
        ]
        self._per_interval_and_per_day_vectors = [
            vector
            for vector in vectors
            if is_per_interval_or_per_day_vector(vector)
            and get_cumulative_vector_name(vector) in provider.vector_names()
        ]
        self._vector_calculator_expressions = (
            get_selected_expressions(expressions, vectors)
            if expressions is not None
            else []
        )
        self._resampling_frequency = (
            resampling_frequency if self._provider.supports_resampling() else None
        )
        self._relative_date = relative_date
    def __init__(
        self,
        name: str,
        provider_pair: Tuple[EnsembleSummaryProvider, EnsembleSummaryProvider],
        vectors: List[str],
        expressions: Optional[List[ExpressionInfo]] = None,
        resampling_frequency: Optional[Frequency] = None,
        relative_date: Optional[datetime.datetime] = None,
    ) -> None:
        if len(provider_pair) != 2:
            raise ValueError(
                'Expect input argument "provider_pair" to have two providers!'
                f"Got {len(provider_pair)}")
        self._provider_a = provider_pair[0]
        self._provider_b = provider_pair[1]

        # Initialize base class
        _intersected_realizations = [
            elm for elm in self._provider_a.realizations()
            if elm in self._provider_b.realizations()
        ]
        super().__init__(_intersected_realizations)

        self._name = name
        if (self._provider_a.supports_resampling() !=
                self._provider_b.supports_resampling()):
            raise ValueError(
                f"Ensemble A and B must have same resampling support! "
                f"Ensemble A support resampling: {self._provider_a.supports_resampling()} "
                f"and Ensemble B support resampling: {self._provider_b.supports_resampling()}"
            )

        # Intersection of vectors in providers
        _accessor_vectors = [
            elm for elm in self._provider_a.vector_names()
            if elm in self._provider_b.vector_names()
        ]

        # Categorize vector types among the vectors in argument
        self._provider_vectors = [
            vector for vector in vectors if vector in _accessor_vectors
        ]
        self._per_interval_and_per_day_vectors = [
            vector for vector in vectors
            if is_per_interval_or_per_day_vector(vector)
            and get_cumulative_vector_name(vector) in _accessor_vectors
        ]
        self._vector_calculator_expressions = (get_selected_expressions(
            expressions, vectors) if expressions is not None else [])

        # Set resampling frequency
        self._resampling_frequency = (
            resampling_frequency if self._provider_a.supports_resampling()
            and self._provider_b.supports_resampling() else None)

        self._relative_date = relative_date
Example #3
0
    def _user_download_data(
        data_requested: Union[int, None],
        vectors: List[str],
        selected_ensembles: List[str],
        visualization_value: str,
        resampling_frequency_value: str,
        selected_realizations: List[int],
        statistics_calculated_from_value: str,
        delta_ensembles: List[DeltaEnsemble],
        vector_calculator_expressions: List[ExpressionInfo],
    ) -> Union[EncodedFile, str]:
        """Callback to download data based on selections

        Retrieve vector data based on selected visualizations and filtered realizations

        NOTE:
        * Does not group based on "Group By" - data is stored per vector
        * All statistics included - no filtering on statistics selections
        * No history vector
        * No observation data
        """
        if data_requested is None:
            raise PreventUpdate

        if not isinstance(selected_ensembles, list):
            raise TypeError("ensembles should always be of type list")

        if vectors is None:
            vectors = initial_selected_vectors

        # Retrieve the selected expressions
        selected_expressions = get_selected_expressions(
            vector_calculator_expressions, vectors
        )

        # Convert from string values to enum types
        visualization = VisualizationOptions(visualization_value)
        resampling_frequency = Frequency.from_string_value(resampling_frequency_value)
        statistics_from_option = StatisticsFromOptions(statistics_calculated_from_value)

        # Create dict of derived vectors accessors for selected ensembles
        derived_vectors_accessors: Dict[
            str, DerivedVectorsAccessor
        ] = create_derived_vectors_accessor_dict(
            ensembles=selected_ensembles,
            vectors=vectors,
            provider_set=input_provider_set,
            expressions=selected_expressions,
            delta_ensembles=delta_ensembles,
            resampling_frequency=resampling_frequency,
        )

        # Dict with vector name as key and dataframe data as value
        vector_dataframe_dict: Dict[str, pd.DataFrame] = {}

        # Get all realizations if statistics accross all realizations are requested
        is_statistics_from_all_realizations = (
            statistics_from_option == StatisticsFromOptions.ALL_REALIZATIONS
            and visualization
            in [
                VisualizationOptions.FANCHART,
                VisualizationOptions.STATISTICS,
                VisualizationOptions.STATISTICS_AND_REALIZATIONS,
            ]
        )

        # Plotting per derived vectors accessor
        for ensemble, accessor in derived_vectors_accessors.items():
            # Realization query - realizations query for accessor
            # - Get non-filter query, None, if statistics from all realizations is needed
            # - Create valid realizations query for accessor otherwise:
            #   * List[int]: Filtered valid realizations, empty list if none are valid
            #   * None: Get all realizations, i.e. non-filtered query
            realizations_query = (
                None
                if is_statistics_from_all_realizations
                else accessor.create_valid_realizations_query(selected_realizations)
            )

            # If all selected realizations are invalid for accessor - empty list
            if realizations_query == []:
                continue

            # Retrive vectors data from accessor
            vectors_df_list: List[pd.DataFrame] = []
            if accessor.has_provider_vectors():
                vectors_df_list.append(
                    accessor.get_provider_vectors_df(realizations=realizations_query)
                )
            if accessor.has_interval_and_average_vectors():
                vectors_df_list.append(
                    accessor.create_interval_and_average_vectors_df(
                        realizations=realizations_query
                    )
                )
            if accessor.has_vector_calculator_expressions():
                vectors_df_list.append(
                    accessor.create_calculated_vectors_df(
                        realizations=realizations_query
                    )
                )

            # Append data for each vector
            for vectors_df in vectors_df_list:
                vector_names = [
                    elm for elm in vectors_df.columns if elm not in ["DATE", "REAL"]
                ]

                if visualization in [
                    VisualizationOptions.REALIZATIONS,
                    VisualizationOptions.STATISTICS_AND_REALIZATIONS,
                ]:
                    # NOTE: Should in theory not have situation with query of all realizations
                    # if not wanted
                    vectors_df_filtered = (
                        vectors_df
                        if realizations_query
                        else vectors_df[vectors_df["REAL"].isin(selected_realizations)]
                    )
                    for vector in vector_names:
                        vector_df = vectors_df_filtered[["DATE", "REAL", vector]]
                        row_count = vector_df.shape[0]
                        ensemble_name_list = [ensemble] * row_count
                        vector_df.insert(
                            loc=0, column="ENSEMBLE", value=ensemble_name_list
                        )

                        if vector.startswith(("AVG_", "INTVL_")):
                            vector_df["DATE"] = vector_df["DATE"].apply(
                                datetime_to_intervalstr, freq=resampling_frequency
                            )

                        vector_key = vector + "_realizations"
                        if vector_dataframe_dict.get(vector_key) is None:
                            vector_dataframe_dict[vector_key] = vector_df
                        else:
                            vector_dataframe_dict[vector_key] = pd.concat(
                                [vector_dataframe_dict[vector_key], vector_df],
                                ignore_index=True,
                                axis=0,
                            )

                if visualization in [
                    VisualizationOptions.STATISTICS,
                    VisualizationOptions.FANCHART,
                    VisualizationOptions.STATISTICS_AND_REALIZATIONS,
                ]:
                    vectors_statistics_df = create_vectors_statistics_df(vectors_df)

                    for vector in vector_names:
                        vector_statistics_df = vectors_statistics_df[["DATE", vector]]
                        row_count = vector_statistics_df.shape[0]
                        ensemble_name_list = [ensemble] * row_count
                        vector_statistics_df.insert(
                            loc=0, column="ENSEMBLE", value=ensemble_name_list
                        )

                        vector_key = vector + "_statistics"

                        if vector.startswith(("AVG_", "INTVL_")):
                            vector_statistics_df.loc[
                                :, ("DATE", "")
                            ] = vector_statistics_df.loc[:, ("DATE", "")].apply(
                                datetime_to_intervalstr, freq=resampling_frequency
                            )
                        if vector_dataframe_dict.get(vector_key) is None:
                            vector_dataframe_dict[vector_key] = vector_statistics_df
                        else:
                            vector_dataframe_dict[vector_key] = pd.concat(
                                [
                                    vector_dataframe_dict[vector_key],
                                    vector_statistics_df,
                                ],
                                ignore_index=True,
                                axis=0,
                            )

        # : is replaced with _ in filenames to stay within POSIX portable pathnames
        # (e.g. : is not valid in a Windows path)
        return WebvizPluginABC.plugin_data_compress(
            [
                {
                    "filename": f"{vector.replace(':', '_')}.csv",
                    "content": df.to_csv(index=False),
                }
                for vector, df in vector_dataframe_dict.items()
            ]
        )
Example #4
0
    def _update_vector_calculator_expressions_on_modal_close(
        is_modal_open: bool,
        new_expressions: List[ExpressionInfo],
        current_expressions: List[ExpressionInfo],
        current_selected_vectors: List[str],
        current_custom_vector_definitions: dict,
        graph_data_has_changed_counter: int,
    ) -> list:
        """Update vector calculator expressions, propagate expressions to VectorSelectors,
        update current selections and trigger re-rendering of graphing if necessary
        """
        if is_modal_open or (new_expressions == current_expressions):
            raise PreventUpdate

        # Create current selected expressions for comparison - Deep copy!
        current_selected_expressions = get_selected_expressions(
            current_expressions, current_selected_vectors
        )

        # Create new vector selector data - Deep copy!
        new_vector_selector_data = copy.deepcopy(vector_selector_base_data)
        add_expressions_to_vector_selector_data(
            new_vector_selector_data, new_expressions
        )

        # Create new selected vectors - from new expressions
        new_selected_vectors = _create_new_selected_vectors(
            current_selected_vectors,
            current_expressions,
            new_expressions,
            new_vector_selector_data,
        )

        # Get new selected expressions
        new_selected_expressions = get_selected_expressions(
            new_expressions, new_selected_vectors
        )

        # Get new custom vector definitions
        new_custom_vector_definitions = get_custom_vector_definitions_from_expressions(
            new_expressions
        )

        # Prevent updates if unchanged
        if new_custom_vector_definitions == current_custom_vector_definitions:
            new_custom_vector_definitions = dash.no_update

        if new_selected_vectors == current_selected_vectors:
            new_selected_vectors = dash.no_update

        # If selected expressions are edited - Only trigger graph data update property when needed,
        # i.e. names are unchanged and selectedNodes for VectorSelector remains unchanged.
        new_graph_data_has_changed_counter = dash.no_update
        if (
            new_selected_expressions != current_selected_expressions
            and new_selected_vectors == dash.no_update
        ):
            new_graph_data_has_changed_counter = graph_data_has_changed_counter + 1

        return [
            new_expressions,
            new_vector_selector_data,
            new_selected_vectors,
            new_custom_vector_definitions,
            new_graph_data_has_changed_counter,
        ]
Example #5
0
    def _update_graph(
        vectors: List[str],
        selected_ensembles: List[str],
        visualization_value: str,
        statistics_option_values: List[str],
        fanchart_option_values: List[str],
        trace_option_values: List[str],
        subplot_owner_options_value: str,
        resampling_frequency_value: str,
        selected_realizations: List[int],
        statistics_calculated_from_value: str,
        __graph_data_has_changed_trigger: int,
        delta_ensembles: List[DeltaEnsemble],
        vector_calculator_expressions: List[ExpressionInfo],
        ensemble_dropdown_options: List[dict],
    ) -> dict:
        """Callback to update all graphs based on selections

        * De-serialize from JSON serializable format to strongly typed and filtered format
        * Business logic:
            * Functionality with "strongly typed" and filtered input format - functions and
            classes
            * ProviderSet for EnsembleSummaryProviders, i.e. input_provider_set
            * DerivedEnsembleVectorsAccessor to access derived vector data from ensembles
            with single providers or delta ensemble with two providers
            * GraphFigureBuilder to create graph with subplots per vector or subplots per
            ensemble, using VectorSubplotBuilder and EnsembleSubplotBuilder, respectively
        * Create/build property serialization in FigureBuilder by use of business logic data

        NOTE: __graph_data_has_changed_trigger is only used to trigger callback when change of
        graphs data has changed and re-render of graph is necessary. E.g. when a selected expression
        from the VectorCalculatorgets edited, without changing the expression name - i.e.
        VectorSelector selectedNodes remain unchanged.
        """
        if not isinstance(selected_ensembles, list):
            raise TypeError("ensembles should always be of type list")

        if vectors is None:
            vectors = initial_selected_vectors

        # Retrieve the selected expressions
        selected_expressions = get_selected_expressions(
            vector_calculator_expressions, vectors
        )

        # Convert from string values to enum types
        visualization = VisualizationOptions(visualization_value)
        statistics_options = [
            StatisticsOptions(elm) for elm in statistics_option_values
        ]
        fanchart_options = [FanchartOptions(elm) for elm in fanchart_option_values]
        trace_options = [TraceOptions(elm) for elm in trace_option_values]
        subplot_owner = SubplotGroupByOptions(subplot_owner_options_value)
        resampling_frequency = Frequency.from_string_value(resampling_frequency_value)
        all_ensemble_names = [option["value"] for option in ensemble_dropdown_options]
        statistics_from_option = StatisticsFromOptions(statistics_calculated_from_value)

        # Prevent update if realization filtering is not affecting pure statistics plot
        # TODO: Refactor code or create utility for getting trigger ID in a "cleaner" way?
        ctx = dash.callback_context.triggered
        trigger_id = ctx[0]["prop_id"].split(".")[0]
        if (
            trigger_id == get_uuid(LayoutElements.REALIZATIONS_FILTER_SELECTOR)
            and statistics_from_option is StatisticsFromOptions.ALL_REALIZATIONS
            and visualization
            in [
                VisualizationOptions.STATISTICS,
                VisualizationOptions.FANCHART,
            ]
        ):
            raise PreventUpdate

        # Create dict of derived vectors accessors for selected ensembles
        derived_vectors_accessors: Dict[
            str, DerivedVectorsAccessor
        ] = create_derived_vectors_accessor_dict(
            ensembles=selected_ensembles,
            vectors=vectors,
            provider_set=input_provider_set,
            expressions=selected_expressions,
            delta_ensembles=delta_ensembles,
            resampling_frequency=resampling_frequency,
        )

        # TODO: How to get metadata for calculated vector?
        vector_line_shapes: Dict[str, str] = {
            vector: get_simulation_line_shape(
                line_shape_fallback,
                vector,
                input_provider_set.vector_metadata(vector),
            )
            for vector in vectors
        }

        figure_builder: GraphFigureBuilderBase
        if subplot_owner is SubplotGroupByOptions.VECTOR:
            # Create unique colors based on all ensemble names to preserve consistent colors
            ensemble_colors = unique_colors(all_ensemble_names, theme)
            vector_titles = create_vector_plot_titles_from_provider_set(
                vectors, selected_expressions, input_provider_set
            )
            figure_builder = VectorSubplotBuilder(
                vectors,
                vector_titles,
                ensemble_colors,
                resampling_frequency,
                vector_line_shapes,
                theme,
            )
        elif subplot_owner is SubplotGroupByOptions.ENSEMBLE:
            vector_colors = unique_colors(vectors, theme)
            figure_builder = EnsembleSubplotBuilder(
                vectors,
                selected_ensembles,
                vector_colors,
                resampling_frequency,
                vector_line_shapes,
                theme,
            )
        else:
            raise PreventUpdate

        # Get all realizations if statistics accross all realizations are requested
        is_statistics_from_all_realizations = (
            statistics_from_option == StatisticsFromOptions.ALL_REALIZATIONS
            and visualization
            in [
                VisualizationOptions.FANCHART,
                VisualizationOptions.STATISTICS,
                VisualizationOptions.STATISTICS_AND_REALIZATIONS,
            ]
        )

        # Plotting per derived vectors accessor
        for ensemble, accessor in derived_vectors_accessors.items():
            # Realization query - realizations query for accessor
            # - Get non-filter query, None, if statistics from all realizations is needed
            # - Create valid realizations query for accessor otherwise:
            #   * List[int]: Filtered valid realizations, empty list if none are valid
            #   * None: Get all realizations, i.e. non-filtered query
            realizations_query = (
                None
                if is_statistics_from_all_realizations
                else accessor.create_valid_realizations_query(selected_realizations)
            )

            # If all selected realizations are invalid for accessor - empty list
            if realizations_query == []:
                continue

            # TODO: Consider to remove list vectors_df_list and use pd.concat to obtain
            # one single dataframe with vector columns. NB: Assumes equal sampling rate
            # for each vector type - i.e equal number of rows in dataframes

            # Retrive vectors data from accessor
            vectors_df_list: List[pd.DataFrame] = []
            if accessor.has_provider_vectors():
                vectors_df_list.append(
                    accessor.get_provider_vectors_df(realizations=realizations_query)
                )
            if accessor.has_interval_and_average_vectors():
                vectors_df_list.append(
                    accessor.create_interval_and_average_vectors_df(
                        realizations=realizations_query
                    )
                )
            if accessor.has_vector_calculator_expressions():
                vectors_df_list.append(
                    accessor.create_calculated_vectors_df(
                        realizations=realizations_query
                    )
                )

            for vectors_df in vectors_df_list:
                if visualization == VisualizationOptions.REALIZATIONS:
                    # Show selected realizations - only filter df if realizations filter
                    # query is not performed
                    figure_builder.add_realizations_traces(
                        vectors_df
                        if realizations_query
                        else vectors_df[vectors_df["REAL"].isin(selected_realizations)],
                        ensemble,
                    )
                if visualization == VisualizationOptions.STATISTICS:
                    vectors_statistics_df = create_vectors_statistics_df(vectors_df)
                    figure_builder.add_statistics_traces(
                        vectors_statistics_df,
                        ensemble,
                        statistics_options,
                    )
                if visualization == VisualizationOptions.FANCHART:
                    vectors_statistics_df = create_vectors_statistics_df(vectors_df)
                    figure_builder.add_fanchart_traces(
                        vectors_statistics_df,
                        ensemble,
                        fanchart_options,
                    )
                if visualization == VisualizationOptions.STATISTICS_AND_REALIZATIONS:
                    # Configure line width and color scaling to easier separate
                    # statistics traces and realization traces.
                    # Show selected realizations - only filter df if realizations filter
                    # query is not performed
                    figure_builder.add_realizations_traces(
                        vectors_df
                        if realizations_query
                        else vectors_df[vectors_df["REAL"].isin(selected_realizations)],
                        ensemble,
                        color_lightness_scale=150.0,
                    )
                    # Add statistics on top
                    vectors_statistics_df = create_vectors_statistics_df(vectors_df)
                    figure_builder.add_statistics_traces(
                        vectors_statistics_df,
                        ensemble,
                        statistics_options,
                        line_width=3,
                    )

        # Retrieve selected input providers
        selected_input_providers = ProviderSet(
            {
                name: provider
                for name, provider in input_provider_set.items()
                if name in selected_ensembles
            }
        )

        # Do not add observations if only delta ensembles are selected
        is_only_delta_ensembles = (
            len(selected_input_providers.names()) == 0
            and len(derived_vectors_accessors) > 0
        )
        if (
            observations
            and TraceOptions.OBSERVATIONS in trace_options
            and not is_only_delta_ensembles
        ):
            for vector in vectors:
                vector_observations = observations.get(vector)
                if vector_observations:
                    figure_builder.add_vector_observations(vector, vector_observations)

        # Add history trace
        # TODO: Improve when new history vector input format is in place
        if TraceOptions.HISTORY in trace_options:
            if (
                isinstance(figure_builder, VectorSubplotBuilder)
                and len(selected_input_providers.names()) > 0
            ):
                # Add history trace using first selected ensemble
                name = selected_input_providers.names()[0]
                provider = selected_input_providers.provider(name)
                vector_names = provider.vector_names()

                provider_vectors = [elm for elm in vectors if elm in vector_names]
                if provider_vectors:
                    history_vectors_df = create_history_vectors_df(
                        provider, provider_vectors, resampling_frequency
                    )
                    # TODO: Handle check of non-empty dataframe better?
                    if (
                        not history_vectors_df.empty
                        and "DATE" in history_vectors_df.columns
                    ):
                        figure_builder.add_history_traces(history_vectors_df)

            if isinstance(figure_builder, EnsembleSubplotBuilder):
                # Add history trace for each ensemble
                for name, provider in selected_input_providers.items():
                    vector_names = provider.vector_names()

                    provider_vectors = [elm for elm in vectors if elm in vector_names]
                    if provider_vectors:
                        history_vectors_df = create_history_vectors_df(
                            provider, provider_vectors, resampling_frequency
                        )
                        # TODO: Handle check of non-empty dataframe better?
                        if (
                            not history_vectors_df.empty
                            and "DATE" in history_vectors_df.columns
                        ):
                            figure_builder.add_history_traces(
                                history_vectors_df,
                                name,
                            )

        # Create legends when all data is added to figure
        figure_builder.create_graph_legends()

        return figure_builder.get_serialized_figure()