Beispiel #1
0
    def test_file_io(self):
        with TestAreaContext('test_file_io'):
            ecl_sum = nex2ecl(self.plt,
                              'ECL_CASE',
                              format=False,
                              field_name='FIELD')
            ecl_sum.fwrite()
            self.assertTrue(
                os.path.exists(os.path.join(os.getcwd(), 'ECL_CASE.SMSPEC')))

            ecl_sum_loaded = EclSum('ECL_CASE')
            self.assertEqual(len(ecl_sum), len(ecl_sum_loaded))
            self.assertIn('WGPT:2', ecl_sum_loaded)

            WGPT2 = list(ecl_sum_loaded.get_values('WGPT:2'))
            CGP = self.plt.loc[(self.plt['classname'] == 'WELL')
                               & (self.plt['instancename'] == '2') &
                               (self.plt['varname']
                                == 'CGP')]['value'].tolist()
            self.assertEqual(WGPT2, CGP)

            dates_loaded = ecl_sum_loaded.dates
            dates_plt = [
                self.plt.start_date + datetime.timedelta(days=days)
                for days in self.plt.time.unique()
            ]
            self.assertEqual(dates_loaded, dates_plt)
Beispiel #2
0
class FlowData(FromSource):
    """
    Flow data source class

    Args:
         input_case: Full path to eclipse case to load data from
         layers: List with definition of isolated layers, if present.

    """
    def __init__(
            self,
            input_case: Union[Path, str],
            layers: Tuple = (),
    ):
        super().__init__()

        self._input_case: Path = Path(input_case)
        self._eclsum = EclSum(str(self._input_case))
        self._init = EclFile(str(self._input_case.with_suffix(".INIT")))
        self._grid = EclGrid(str(self._input_case.with_suffix(".EGRID")))
        self._restart = EclFile(str(self._input_case.with_suffix(".UNRST")))
        self._init = EclInitFile(self._grid,
                                 str(self._input_case.with_suffix(".INIT")))
        self._wells = compdat.df(EclFiles(str(self._input_case)))
        self._layers = layers

    # pylint: disable=too-many-branches
    def _well_connections(self,
                          perforation_handling_strategy: str) -> pd.DataFrame:
        """
        Function to extract well connection coordinates from a Flow simulation including their
        opening and closure time. The output of this function will be filtered based on the
        configured perforation strategy.

        Args:
            perforation_handling_strategy: Strategy to be used when creating perforations.
            Valid options are bottom_point, top_point, multiple, time_avg_open_location and
            multiple_based_on_workovers.

        Returns:
            columns: WELL_NAME, X, Y, Z, DATE, OPEN, LAYER_ID

        """
        if len(self._layers) > 0 and self._grid.nz is not self._layers[-1][-1]:
            raise ValueError(
                f"Number of layers from config ({self._layers[-1][-1]}) is not equal to "
                f"number of layers from flow simulation ({self._grid.nz}).")

        new_items = []
        for _, row in self._wells.iterrows():
            X, Y, Z = self._grid.get_xyz(ijk=(row["I"] - 1, row["J"] - 1,
                                              row["K1"] - 1))
            if len(self._layers) > 0:
                for count, (i, j) in enumerate(self._layers):
                    if row["K1"] in range(i, j + 1):
                        layer_id = count
                        break
            else:
                layer_id = 0

            new_row = {
                "WELL_NAME": row["WELL"],
                "IJK": (
                    row["I"] - 1,
                    row["J"] - 1,
                    row["K1"] - 1,
                ),
                "X": X,
                "Y": Y,
                "Z": Z,
                "DATE": row["DATE"],
                "OPEN": bool(row["OP/SH"] == "OPEN"),
                "LAYER_ID": layer_id,
            }
            new_items.append(new_row)

        df = pd.DataFrame(
            new_items,
            columns=[
                "WELL_NAME", "IJK", "X", "Y", "Z", "DATE", "OPEN", "LAYER_ID"
            ],
        )
        df["DATE"] = pd.to_datetime(df["DATE"], format="%Y-%m-%d").dt.date

        try:
            perforation_strategy_method = getattr(
                perforation_strategy, perforation_handling_strategy)
        except AttributeError as attribute_error:
            raise NotImplementedError(
                f"The perforation handling strategy {perforation_handling_strategy} is unknown."
            ) from attribute_error

        return perforation_strategy_method(df).sort_values(["DATE"])

    def _well_logs(self) -> pd.DataFrame:
        """
        Function to extract well log information from a Flow simulation.

        Returns:
            columns: WELL_NAME, X, Y, Z, PERM (mD), PORO (-)

        """
        coords: List = []

        for well_name in self._wells["WELL"].unique():
            unique_connections = self._wells[self._wells["WELL"] ==
                                             well_name].drop_duplicates(
                                                 subset=["I", "J", "K1", "K2"])
            for _, connection in unique_connections.iterrows():
                ijk = (connection["I"] - 1, connection["J"] - 1,
                       connection["K1"] - 1)
                xyz = self._grid.get_xyz(ijk=ijk)

                perm_kw = self._init.iget_named_kw("PERMX", 0)
                poro_kw = self._init.iget_named_kw("PORO", 0)

                coords.append([
                    well_name,
                    *xyz,
                    perm_kw[self._grid.cell(i=ijk[0], j=ijk[1],
                                            k=ijk[2]).active_index],
                    poro_kw[self._grid.cell(i=ijk[0], j=ijk[1],
                                            k=ijk[2]).active_index],
                ])

        return pd.DataFrame(
            coords, columns=["WELL_NAME", "X", "Y", "Z", "PERM", "PORO"])

    def _production_data(self) -> pd.DataFrame:
        """
        Function to read production data for all producers and injectors from an
        Flow simulation. The simulation is required to write out the
        following vectors to the summary file: WOPR, WGPR, WWPR, WBHP, WTHP, WGIR, WWIR

        Returns:
            A DataFrame with a DateTimeIndex and the following columns:
                - date          equal to index
                - WELL_NAME     Well name as used in Flow
                - WOPR          Well Oil Production Rate
                - WGPR          Well Gas Production Rate
                - WWPR          Well Water Production Rate
                - WOPT          Well Cumulative Oil Production
                - WGPT          Well Cumulative Gas Production
                - WWPT          Well Cumulative Water Production
                - WBHP          Well Bottom Hole Pressure
                - WTHP          Well Tubing Head Pressure
                - WGIR          Well Gas Injection Rate
                - WWIR          Well Water Injection Rate
                - WSPR          Well Salt Production Rate
                - WSIR          Well Salt Injection Rate
                - WSPT          Well Cumulative Salt Production
                - WSIT          Well Cumulative Salt Injection
                - WTICHEA       Well Injection Temperature
                - WTPCHEA       Well Production Temperature
                - WSTAT         Well status (OPEN, SHUT, STOP)
                - TYPE          Well Type: "OP", "GP", "WI", "GI"
                - PHASE         Main producing/injecting phase fluid: "OIL", "GAS", "WATER"

        Todo:
            * Remove depreciation warning suppression when solved in LibEcl.
            * Improve robustness pf setting of Phase and Type.

        """
        keys = [
            "WOPR",
            "WGPR",
            "WWPR",
            "WOPT",
            "WGPT",
            "WWPT",
            "WBHP",
            "WTHP",
            "WGIR",
            "WWIR",
            "WGIT",
            "WWIT",
            "WSPR",
            "WSIR",
            "WSPT",
            "WSIT",
            "WTPCHEA",
            "WTICHEA",
            "WSTAT",
        ]

        df_production_data = pd.DataFrame()

        # Suppress a depreciation warning inside LibEcl
        warnings.simplefilter("ignore", category=DeprecationWarning)
        with warnings.catch_warnings():

            for well_name in self._eclsum.wells():
                df = pd.DataFrame()

                df["date"] = self._eclsum.report_dates
                df["date"] = pd.to_datetime(df["date"])
                df.set_index("date", inplace=True)

                for prod_key in keys:
                    try:
                        df[f"{prod_key}"] = self._eclsum.get_values(
                            f"{prod_key}:{well_name}", report_only=True)
                    except KeyError:
                        df[f"{prod_key}"] = np.nan

                # Set columns that have only exact zero values to np.nan
                df.loc[:, (df == 0).all(axis=0)] = np.nan

                df["WELL_NAME"] = well_name

                df["PHASE"] = None
                df.loc[df["WOPR"] > 0, "PHASE"] = "OIL"
                df.loc[(df["WOPR"] == 0) & (df["WWPR"] > 0), "PHASE"] = "WATER"
                df.loc[df["WWIR"] > 0, "PHASE"] = "WATER"
                df.loc[df["WGIR"] > 0, "PHASE"] = "GAS"
                df["TYPE"] = None
                df.loc[df["WOPR"] > 0, "TYPE"] = "OP"
                df.loc[(df["WOPR"] == 0) & (df["WWPR"] > 0), "TYPE"] = "WP"
                df.loc[df["WWIR"] > 0, "TYPE"] = "WI"
                df.loc[df["WGIR"] > 0, "TYPE"] = "GI"
                # make sure the correct well type is set also when the well is shut in
                df[["PHASE", "TYPE"]] = df[["PHASE",
                                            "TYPE"]].fillna(method="backfill")
                df[["PHASE", "TYPE"]] = df[["PHASE",
                                            "TYPE"]].fillna(method="ffill")

                df_production_data = df_production_data.append(df)

        if df_production_data["WSTAT"].isna().all():
            warnings.warn(
                "No WSTAT:* summary vectors in input case - setting default well status to OPEN."
            )
            wstat_default = "OPEN"
        else:
            wstat_default = "STOP"

        df_production_data["WSTAT"] = df_production_data["WSTAT"].map({
            0:
            wstat_default,
            1:
            "OPEN",  # Producer OPEN
            2:
            "OPEN",  # Injector OPEN
            3:
            "SHUT",
            4:
            "STOP",
            5:
            "SHUT",  # PSHUT
            6:
            "STOP",  # PSTOP
            np.nan:
            wstat_default,
        })

        # ensure that a type is assigned also if a well is never activated
        df_production_data[["PHASE", "TYPE"]] = df_production_data[[
            "PHASE", "TYPE"
        ]].fillna(method="backfill")
        df_production_data[["PHASE", "TYPE"
                            ]] = df_production_data[["PHASE", "TYPE"
                                                     ]].fillna(method="ffill")

        df_production_data["date"] = df_production_data.index
        df_production_data["date"] = pd.to_datetime(
            df_production_data["date"]).dt.date

        return df_production_data

    def _faults(self) -> pd.DataFrame:
        """
        Function to read fault plane data using ecl2df.

        Returns:
            A dataframe with columns NAME, X, Y, Z with data for fault planes

        """
        eclfile = EclFiles(self._input_case)
        df_fault_keyword = faults.df(eclfile)

        points = []
        for _, row in df_fault_keyword.iterrows():

            i = row["I"] - 1
            j = row["J"] - 1
            k = row["K"] - 1

            points.append((row["NAME"], i, j, k))

            if (row["FACE"] == "X" or row["FACE"] == "X+" or row["FACE"] == "I"
                    or row["FACE"] == "I+"):
                points.append((row["NAME"], i + 1, j, k))
            elif (row["FACE"] == "Y" or row["FACE"] == "Y+"
                  or row["FACE"] == "J" or row["FACE"] == "J+"):
                points.append((row["NAME"], i, j + 1, k))
            elif (row["FACE"] == "Z" or row["FACE"] == "Z+"
                  or row["FACE"] == "K" or row["FACE"] == "K+"):
                points.append((row["NAME"], i, j, k + 1))
            elif row["FACE"] == "X-" or row["FACE"] == "I-":
                points.append((row["NAME"], i - 1, j, k))
            elif row["FACE"] == "Y-" or row["FACE"] == "J-":
                points.append((row["NAME"], i, j - 1, k))
            elif row["FACE"] == "Z-" or row["FACE"] == "K-":
                points.append((row["NAME"], i, j, k - 1))
            else:
                raise ValueError(
                    f"Could not interpret '{row['FACE']}' while reading the FAULTS keyword."
                )

        df_faults = pd.DataFrame.from_records(points,
                                              columns=["NAME", "I", "J", "K"])

        if not df_faults.empty:
            df_faults[["X", "Y", "Z"]] = pd.DataFrame(
                df_faults.apply(
                    lambda row: list(
                        self._grid.get_xyz(ijk=(row["I"], row["J"], row["K"]))
                    ),
                    axis=1,
                ).values.tolist())

        return df_faults.drop(["I", "J", "K"], axis=1)

    def grid_cell_bounding_boxes(self, layer_id: int) -> np.ndarray:
        """
        Function to get the bounding box (x, y and z min + max) for all grid cells

        Args:
            layer_id: The FlowNet layer id to be used to create the bounding box.

        Returns:
            A (active grid cells x 6) numpy array with columns [ xmin, xmax, ymin, ymax, zmin, zmax ]
            filtered on layer_id if not None.
        """
        if self._layers:
            (k_min,
             k_max) = tuple(map(operator.sub, self._layers[layer_id], (1, 1)))
        else:
            (k_min, k_max) = (0, self._grid.nz)

        cells = [
            cell for cell in self._grid.cells(active=True)
            if (k_min <= cell.k <= k_max)
        ]
        xyz = np.empty((8 * len(cells), 3))

        for n_cell, cell in enumerate(cells):
            for n_corner, corner in enumerate(cell.corners):
                xyz[n_cell * 8 + n_corner, :] = corner

        xmin = xyz[:, 0].reshape(-1, 8).min(axis=1)
        xmax = xyz[:, 0].reshape(-1, 8).max(axis=1)
        ymin = xyz[:, 1].reshape(-1, 8).min(axis=1)
        ymax = xyz[:, 1].reshape(-1, 8).max(axis=1)
        zmin = xyz[:, 2].reshape(-1, 8).min(axis=1)
        zmax = xyz[:, 2].reshape(-1, 8).max(axis=1)

        return np.vstack([xmin, xmax, ymin, ymax, zmin, zmax]).T

    def _get_start_date(self):
        return self._eclsum.start_date

    def init(self, name: str) -> np.ndarray:
        """array with 'name' regions"""
        return self._init[name][0]

    def get_unique_regions(self, name: str) -> np.ndarray:
        """array with unique 'name' regions"""
        return np.unique(self._init[name][0])

    def get_well_connections(
            self, perforation_handling_strategy: str) -> pd.DataFrame:
        """
        Function to get dataframe with all well connection coordinates,
        filtered based on the perforation_handling_strategy.

        Args:
            perforation_handling_strategy: Strategy to be used when creating perforations.
            Valid options are bottom_point, top_point, multiple,
            time_avg_open_location and multiple_based_on_workovers.

        Returns:
            Dataframe with all well connection coordinates,
            filtered based on the perforation_handling_strategy.
            Columns: WELL_NAME, X, Y, Z, DATE, OPEN, LAYER_ID
        """

        return self._well_connections(
            perforation_handling_strategy=perforation_handling_strategy)

    def bulk_volume_per_flownet_cell_based_on_voronoi_of_input_model(
            self, network: NetworkModel) -> np.ndarray:
        """Generate bulk volume distribution per grid cell in the FlowNet model based on the geometrical
        distribution of the volume in the original (full field) simulation model. I.e., the original model's
        volume will be distributed over the FlowNet's tubes by assigning original model grid cell
        volumes to the nearest FlowNet tube cell midpoint. Finally, the volume distributed to all cells in a tube
        will be summed and evenly redistributed over the tube.

        Args:
            network: FlowNet network instance.

        Returns:
            An array with volumes per flownetcell.

        """
        # pylint: disable=too-many-locals

        flownet_cell_midpoints = np.array(network.cell_midpoints).T
        model_cell_mid_points = np.array(
            [cell.coordinate for cell in self._grid.cells(active=True)])
        model_cell_volume = [
            (cell.volume *
             self._init.iget_named_kw("NTG", 0)[cell.active_index])
            for cell in self._grid.cells(active=True)
        ]

        # Number of tubes and tube cells
        properties_per_cell = pd.DataFrame(
            pd.DataFrame(data=network.grid.index,
                         index=network.grid.model).index)
        number_of_tubes = properties_per_cell.groupby(["model"]).ngroups
        cell_volumes = np.zeros(len(properties_per_cell["model"].values))

        # Identify the index of the last (inactive) cell of each tube which will have 0 volume
        inactive_cells = np.zeros(number_of_tubes)
        for i in range(number_of_tubes):
            inactive_cells[i] = (properties_per_cell.reset_index().groupby(
                ["model"]).groups[i][-1])

        # depths should be a list of depths provided by the user. it may also be empty
        depths = network.volume_layering.copy()

        # Add 0 depth level and arrange from deep to shallow)
        depths.append(0)
        depths = list(set(depths))
        depths.sort(reverse=True)

        # Perform mapping of volumes between two depth levels
        for index, depth in enumerate(depths):
            if index == 0:
                # Add a very deep dummy level
                depth_range = [1.0e10, depth]
            else:
                depth_range = [depths[index - 1], depth]

            tube_cell_volumes = np.zeros(len(flownet_cell_midpoints))

            # Identify cells located between the current lower and upper depths levels
            flownet_indices = [
                idx for idx, val in enumerate(network.cell_midpoints[2])
                if (depth_range[0] >= val > depth_range[1])
            ]
            model_indices = [
                idx for idx, val in enumerate(model_cell_mid_points[:, 2])
                if (depth_range[0] >= val > depth_range[1])
            ]

            # Determine nearest flow tube cell for each cell in the original model
            tree = KDTree(flownet_cell_midpoints[flownet_indices, :])
            _, matched_indices = tree.query(
                model_cell_mid_points[model_indices], k=[1])

            # Assign each reservoir model volume to a flow tube
            for idx, val in enumerate(matched_indices):
                tube_cell_volumes[flownet_indices[
                    val[0]]] += model_cell_volume[model_indices[idx]]

            # Compute the total volumes per tube section between the current depth levels
            properties_per_cell["distributed_volume"] = tube_cell_volumes
            tube_volumes = properties_per_cell.groupby(by="model").sum().values

            # Evenly distribute tube volumes over the tube cells between the current depth levels
            for tube in range(number_of_tubes):
                indices = [
                    i for i, x in enumerate(
                        network.grid.model.iloc[flownet_indices].values.tolist(
                        ))
                    if x == tube and flownet_indices[i] not in inactive_cells
                ]
                for _, idx in enumerate(indices):
                    cell_volumes[flownet_indices[
                        idx]] += tube_volumes[tube] / len(indices)

        return cell_volumes

    @property
    def faults(self) -> pd.DataFrame:
        """dataframe with all fault data"""
        return self._faults()

    @property
    def production(self) -> pd.DataFrame:
        """dataframe with all production data"""
        return self._production_data()

    @property
    def well_logs(self) -> pd.DataFrame:
        """dataframe with all well log"""
        return self._well_logs()

    @property
    def grid(self) -> EclGrid:
        """the simulation grid with properties"""
        return self._grid

    @property
    def layers(self) -> Union[Tuple[Tuple[int, int]], Tuple]:
        """Get the list of top and bottom k-indeces of a the orignal model that represents a FlowNet layer"""
        return self._layers
Beispiel #3
0
class FlowData(FromSource):
    """
    Flow data source class

    Args:
         input_case: Full path to eclipse case to load data from
         layers: List with definition of isolated layers, if present.
         perforation_handling_strategy: How to deal with perforations per well.
                                                 ('bottom_point', 'top_point', 'multiple')

    """
    def __init__(
        self,
        input_case: Union[Path, str],
        layers: Tuple = (),
        perforation_handling_strategy: str = "bottom_point",
    ):
        super().__init__()

        self._input_case: Path = Path(input_case)
        self._eclsum = EclSum(str(self._input_case))
        self._init = EclFile(str(self._input_case.with_suffix(".INIT")))
        self._grid = EclGrid(str(self._input_case.with_suffix(".EGRID")))
        self._restart = EclFile(str(self._input_case.with_suffix(".UNRST")))
        self._init = EclInitFile(self._grid,
                                 str(self._input_case.with_suffix(".INIT")))
        self._wells = compdat.df(EclFiles(str(self._input_case)))
        self._layers = layers

        self._perforation_handling_strategy: str = perforation_handling_strategy

    # pylint: disable=too-many-branches
    def _well_connections(self) -> pd.DataFrame:
        """
        Function to extract well connection coordinates from a Flow simulation including their
        opening and closure time. The output of this function will be filtered based on the
        configured perforation strategy.

        Returns:
            columns: WELL_NAME, X, Y, Z, DATE, OPEN, LAYER_ID

        """
        if len(self._layers) > 0 and self._grid.nz is not self._layers[-1][-1]:
            raise ValueError(
                f"Number of layers from config ({self._layers[-1][-1]}) is not equal to "
                f"number of layers from flow simulation ({self._grid.nz}).")

        new_items = []
        for _, row in self._wells.iterrows():
            X, Y, Z = self._grid.get_xyz(ijk=(row["I"] - 1, row["J"] - 1,
                                              row["K1"] - 1))
            if len(self._layers) > 0:
                for count, (i, j) in enumerate(self._layers):
                    if row["K1"] in range(i, j + 1):
                        layer_id = count
                        break
            else:
                layer_id = 0

            new_row = {
                "WELL_NAME": row["WELL"],
                "IJK": (
                    row["I"] - 1,
                    row["J"] - 1,
                    row["K1"] - 1,
                ),
                "X": X,
                "Y": Y,
                "Z": Z,
                "DATE": row["DATE"],
                "OPEN": bool(row["OP/SH"] == "OPEN"),
                "LAYER_ID": layer_id,
            }
            new_items.append(new_row)

        df = pd.DataFrame(
            new_items,
            columns=[
                "WELL_NAME", "IJK", "X", "Y", "Z", "DATE", "OPEN", "LAYER_ID"
            ],
        )
        df["DATE"] = pd.to_datetime(df["DATE"], format="%Y-%m-%d").dt.date

        try:
            perforation_strategy_method = getattr(
                perforation_strategy, self._perforation_handling_strategy)
        except AttributeError as attribute_error:
            raise NotImplementedError(
                f"The perforation handling strategy {self._perforation_handling_strategy} is unknown."
            ) from attribute_error

        return perforation_strategy_method(df).sort_values(["DATE"])

    def _well_logs(self) -> pd.DataFrame:
        """
        Function to extract well log information from a Flow simulation.

        Returns:
            columns: WELL_NAME, X, Y, Z, PERM (mD), PORO (-)

        """
        coords: List = []

        for well_name in self._wells["WELL"].unique():
            unique_connections = self._wells[self._wells["WELL"] ==
                                             well_name].drop_duplicates(
                                                 subset=["I", "J", "K1", "K2"])
            for _, connection in unique_connections.iterrows():
                ijk = (connection["I"] - 1, connection["J"] - 1,
                       connection["K1"] - 1)
                xyz = self._grid.get_xyz(ijk=ijk)

                perm_kw = self._init.iget_named_kw("PERMX", 0)
                poro_kw = self._init.iget_named_kw("PORO", 0)

                coords.append([
                    well_name,
                    *xyz,
                    perm_kw[self._grid.cell(i=ijk[0], j=ijk[1],
                                            k=ijk[2]).active_index],
                    poro_kw[self._grid.cell(i=ijk[0], j=ijk[1],
                                            k=ijk[2]).active_index],
                ])

        return pd.DataFrame(
            coords, columns=["WELL_NAME", "X", "Y", "Z", "PERM", "PORO"])

    def _production_data(self) -> pd.DataFrame:
        """
        Function to read production data for all producers and injectors from an
        Flow simulation. The simulation is required to write out the
        following vectors to the summary file: WOPR, WGPR, WWPR, WBHP, WTHP, WGIR, WWIR

        Returns:
            A DataFrame with a DateTimeIndex and the following columns:
                - date          equal to index
                - WELL_NAME     Well name as used in Flow
                - WOPR          Well Oil Production Rate
                - WGPR          Well Gas Production Rate
                - WWPR          Well Water Production Rate
                - WOPT          Well Cumulative Oil Production
                - WGPT          Well Cumulative Gas Production Rate
                - WWPT          Well Cumulative Water Production Rate
                - WBHP          Well Bottom Hole Pressure
                - WTHP          Well Tubing Head Pressure
                - WGIR          Well Gas Injection Rate
                - WWIR          Well Water Injection Rate
                - WSTAT         Well status (OPEN, SHUT, STOP)
                - TYPE          Well Type: "OP", "GP", "WI", "GI"
                - PHASE         Main producing/injecting phase fluid: "OIL", "GAS", "WATER"

        Todo:
            * Remove depreciation warning suppression when solved in LibEcl.
            * Improve robustness pf setting of Phase and Type.

        """
        keys = [
            "WOPR",
            "WGPR",
            "WWPR",
            "WOPT",
            "WGPT",
            "WWPT",
            "WBHP",
            "WTHP",
            "WGIR",
            "WWIR",
            "WGIT",
            "WWIT",
            "WSTAT",
        ]

        df_production_data = pd.DataFrame()

        # Suppress a depreciation warning inside LibEcl
        warnings.simplefilter("ignore", category=DeprecationWarning)
        with warnings.catch_warnings():

            for well_name in self._eclsum.wells():
                df = pd.DataFrame()

                df["date"] = self._eclsum.report_dates
                df["date"] = pd.to_datetime(df["date"])
                df.set_index("date", inplace=True)

                for prod_key in keys:
                    try:
                        df[f"{prod_key}"] = self._eclsum.get_values(
                            f"{prod_key}:{well_name}", report_only=True)
                    except KeyError:
                        df[f"{prod_key}"] = np.nan

                # Set columns that have only exact zero values to np.nan
                df.loc[:, (df == 0).all(axis=0)] = np.nan

                df["WELL_NAME"] = well_name

                df["PHASE"] = None
                df.loc[df["WOPR"] > 0, "PHASE"] = "OIL"
                df.loc[df["WWIR"] > 0, "PHASE"] = "WATER"
                df.loc[df["WGIR"] > 0, "PHASE"] = "GAS"
                df["TYPE"] = None
                df.loc[df["WOPR"] > 0, "TYPE"] = "OP"
                df.loc[df["WWIR"] > 0, "TYPE"] = "WI"
                df.loc[df["WGIR"] > 0, "TYPE"] = "GI"
                # make sure the correct well type is set also when the well is shut in
                df[["PHASE", "TYPE"]] = df[["PHASE",
                                            "TYPE"]].fillna(method="backfill")
                df[["PHASE", "TYPE"]] = df[["PHASE",
                                            "TYPE"]].fillna(method="ffill")

                df_production_data = df_production_data.append(df)

        if df_production_data["WSTAT"].isna().all():
            warnings.warn(
                "No WSTAT:* summary vectors in input case - setting default well status to OPEN."
            )
            wstat_default = "OPEN"
        else:
            wstat_default = "STOP"

        df_production_data["WSTAT"] = df_production_data["WSTAT"].map({
            0:
            wstat_default,
            1:
            "OPEN",  # Producer OPEN
            2:
            "OPEN",  # Injector OPEN
            3:
            "SHUT",
            4:
            "STOP",
            5:
            "SHUT",  # PSHUT
            6:
            "STOP",  # PSTOP
            np.nan:
            wstat_default,
        })

        # ensure that a type is assigned also if a well is never activated
        df_production_data[["PHASE", "TYPE"]] = df_production_data[[
            "PHASE", "TYPE"
        ]].fillna(method="backfill")
        df_production_data[["PHASE", "TYPE"
                            ]] = df_production_data[["PHASE", "TYPE"
                                                     ]].fillna(method="ffill")

        df_production_data["date"] = df_production_data.index
        df_production_data["date"] = pd.to_datetime(
            df_production_data["date"]).dt.date

        return df_production_data

    def _faults(self) -> pd.DataFrame:
        """
        Function to read fault plane data using ecl2df.

        Returns:
            A dataframe with columns NAME, X, Y, Z with data for fault planes

        """
        eclfile = EclFiles(self._input_case)
        df_fault_keyword = faults.df(eclfile)

        points = []
        for _, row in df_fault_keyword.iterrows():

            i = row["I"] - 1
            j = row["J"] - 1
            k = row["K"] - 1

            points.append((row["NAME"], i, j, k))

            if row["FACE"] == "X" or row["FACE"] == "X+":
                points.append((row["NAME"], i + 1, j, k))
            elif row["FACE"] == "Y" or row["FACE"] == "Y+":
                points.append((row["NAME"], i, j + 1, k))
            elif row["FACE"] == "Z" or row["FACE"] == "Z+":
                points.append((row["NAME"], i, j, k + 1))
            elif row["FACE"] == "X-":
                points.append((row["NAME"], i - 1, j, k))
            elif row["FACE"] == "Y-":
                points.append((row["NAME"], i, j - 1, k))
            elif row["FACE"] == "Z-":
                points.append((row["NAME"], i, j, k - 1))
            else:
                raise ValueError(
                    f"Could not interpret '{row['FACE']}' while reading the FAULTS keyword."
                )

        df_faults = pd.DataFrame.from_records(points,
                                              columns=["NAME", "I", "J", "K"])

        if not df_faults.empty:
            df_faults[["X", "Y", "Z"]] = pd.DataFrame(
                df_faults.apply(
                    lambda row: list(
                        self._grid.get_xyz(ijk=(row["I"], row["J"], row["K"]))
                    ),
                    axis=1,
                ).values.tolist())

        return df_faults.drop(["I", "J", "K"], axis=1)

    def _grid_cell_bounding_boxes(self,
                                  layer_id: Optional[int] = None
                                  ) -> np.ndarray:
        """
        Function to get the bounding box (x, y and z min + max) for all grid cells

        Args:
            layer_id: The FlowNet layer id to be used to create the bounding box.

        Returns:
            A (active grid cells x 6) numpy array with columns [ xmin, xmax, ymin, ymax, zmin, zmax ]
            filtered on layer_id if not None.
        """
        if layer_id is not None:
            (k_min,
             k_max) = tuple(map(operator.sub, self._layers[layer_id], (1, 1)))
        else:
            (k_min, k_max) = (0, self._grid.nz)

        cells = [
            cell for cell in self._grid.cells(active=True)
            if (k_min <= cell.k <= k_max)
        ]
        xyz = np.empty((8 * len(cells), 3))

        for n_cell, cell in enumerate(cells):
            for n_corner, corner in enumerate(cell.corners):
                xyz[n_cell * 8 + n_corner, :] = corner

        xmin = xyz[:, 0].reshape(-1, 8).min(axis=1)
        xmax = xyz[:, 0].reshape(-1, 8).max(axis=1)
        ymin = xyz[:, 1].reshape(-1, 8).min(axis=1)
        ymax = xyz[:, 1].reshape(-1, 8).max(axis=1)
        zmin = xyz[:, 2].reshape(-1, 8).min(axis=1)
        zmax = xyz[:, 2].reshape(-1, 8).max(axis=1)

        return np.vstack([xmin, xmax, ymin, ymax, zmin, zmax]).T

    def _get_start_date(self):
        return self._eclsum.start_date

    def init(self, name: str) -> np.ndarray:
        """array with 'name' regions"""
        return self._init[name][0]

    def get_unique_regions(self, name: str) -> np.ndarray:
        """array with unique 'name' regions"""
        return np.unique(self._init[name][0])

    @property
    def grid_cell_bounding_boxes(self) -> np.ndarray:
        """Boundingboxes for all gridcells"""
        return self._grid_cell_bounding_boxes()

    @property
    def faults(self) -> pd.DataFrame:
        """dataframe with all fault data"""
        return self._faults()

    @property
    def production(self) -> pd.DataFrame:
        """dataframe with all production data"""
        return self._production_data()

    @property
    def well_connections(self) -> pd.DataFrame:
        """dataframe with all well connection coordinates"""
        return self._well_connections()

    @property
    def well_logs(self) -> pd.DataFrame:
        """dataframe with all well log"""
        return self._well_logs()

    @property
    def grid(self) -> EclGrid:
        """the simulation grid with properties"""
        return self._grid

    @property
    def layers(self) -> Union[Tuple[Tuple[int, int]], Tuple]:
        """Get the list of top and bottom k-indeces of a the orignal model that represents a FlowNet layer"""
        return self._layers
Beispiel #4
0
class FlowData(FromSource):
    """
    Flow data source class

    Args:
         input_case: Full path to eclipse case to load data from
         perforation_handling_strategy: How to deal with perforations per well.
                                                 ('bottom_point', 'top_point', 'multiple')

    """

    def __init__(
        self,
        input_case: Union[Path, str],
        perforation_handling_strategy: str = "bottom_point",
    ):
        super().__init__()

        self._input_case: Path = Path(input_case)
        self._eclsum = EclSum(str(self._input_case))
        self._grid = EclGrid(str(self._input_case.with_suffix(".EGRID")))
        self._restart = EclFile(str(self._input_case.with_suffix(".UNRST")))
        self._wells = WellInfo(
            self._grid, rst_file=self._restart, load_segment_information=True
        )

        self._perforation_handling_strategy: str = perforation_handling_strategy

    # pylint: disable=too-many-branches
    def _coordinates(self) -> pd.DataFrame:
        """
        Function to extract well coordinates from an Flow simulation.

        Returns:
            columns: WELL_NAME, X, Y, Z

        """

        def multi_xyz_append(append_obj_list):
            for global_conn in append_obj_list[1]:
                coords.append(
                    [append_obj_list[0], *self._grid.get_xyz(ijk=global_conn.ijk())]
                )

        coords: List = []

        for well_name in self._wells.allWellNames():
            global_conns = self._wells[well_name][0].globalConnections()
            coord_append = coords.append
            if self._perforation_handling_strategy == "bottom_point":
                xyz = self._grid.get_xyz(ijk=global_conns[-1].ijk())
            elif self._perforation_handling_strategy == "top_point":
                xyz = self._grid.get_xyz(ijk=global_conns[0].ijk())
            elif self._perforation_handling_strategy == "multiple":
                xyz = [global_conns]
                coord_append = multi_xyz_append
            elif self._perforation_handling_strategy == "time_avg_open_location":
                connection_open_time = {}

                for i, conn_status in enumerate(self._wells[well_name]):
                    time = datetime.datetime.strptime(
                        str(conn_status.simulationTime()), "%Y-%m-%d %H:%M:%S"
                    )
                    if i == 0:
                        prev_time = time

                    for connection in conn_status.globalConnections():
                        if connection.ijk() not in connection_open_time:
                            connection_open_time[connection.ijk()] = 0.0
                        elif connection.isOpen():
                            connection_open_time[connection.ijk()] += (
                                time - prev_time
                            ).total_seconds()
                        else:
                            connection_open_time[connection.ijk()] += 0.0

                    prev_time = time

                xyz_values = np.zeros((1, 3), dtype=np.float64)
                total_open_time = sum(connection_open_time.values())

                if total_open_time > 0:
                    for connection, open_time in connection_open_time.items():
                        xyz_values += np.multiply(
                            np.array(self._grid.get_xyz(ijk=connection)),
                            open_time / total_open_time,
                        )
                else:
                    for connection, open_time in connection_open_time.items():
                        xyz_values += np.divide(
                            np.array(self._grid.get_xyz(ijk=connection)),
                            len(connection_open_time.items()),
                        )

                xyz = tuple(*xyz_values)

            else:
                raise Exception(
                    f"perforation strategy {self._perforation_handling_strategy} unknown"
                )

            coord_append([well_name, *xyz])

        return pd.DataFrame(coords, columns=["WELL_NAME", "X", "Y", "Z"])

    def _production_data(self) -> pd.DataFrame:
        """
        Function to read production data for all producers and injectors from an
        Flow simulation. The simulation is required to write out the
        following vectors to the summary file: WOPR, WGPR, WWPR, WBHP, WTHP, WGIR, WWIR

        Returns:
            A DataFrame with a DateTimeIndex and the following columns:
                - date          equal to index
                - WELL_NAME     Well name as used in Flow
                - WOPR          Well Oil Production Rate
                - WGPR          Well Gas Production Rate
                - WWPR          Well Water Production Rate
                - WBHP          Well Bottom Hole Pressure
                - WTHP          Well Tubing Head Pressure
                - WGIR          Well Gas Injection Rate
                - WWIR          Well Water Injection Rate
                - WSTAT         Well status (OPEN, SHUT, STOP)
                - TYPE          Well Type: "OP", "GP", "WI", "GI"
                - PHASE         Main producing/injecting phase fluid: "OIL", "GAS", "WATER"

        Todo:
            * Remove depreciation warning suppression when solved in LibEcl.
            * Improve robustness pf setting of Phase and Type.

        """
        keys = ["WOPR", "WGPR", "WWPR", "WBHP", "WTHP", "WGIR", "WWIR", "WSTAT"]

        df_production_data = pd.DataFrame()

        start_date = self._get_start_date()

        # Suppress a depreciation warning inside LibEcl
        warnings.simplefilter("ignore", category=DeprecationWarning)
        with warnings.catch_warnings():

            for well_name in self._eclsum.wells():
                df = pd.DataFrame()

                df["date"] = self._eclsum.report_dates
                df["date"] = pd.to_datetime(df["date"])
                df.set_index("date", inplace=True)

                for prod_key in keys:
                    try:
                        df[f"{prod_key}"] = self._eclsum.get_values(
                            f"{prod_key}:{well_name}", report_only=True
                        )
                    except KeyError:
                        df[f"{prod_key}"] = np.nan

                # Find number of leading empty rows (with only nan or 0 values)
                zero = df.fillna(0).eq(0).all(1).sum()

                if zero < df.shape[0]:
                    # If there are no empty rows, prepend one for the start date
                    if zero == 0:
                        df1 = df.head(1)
                        as_list = df1.index.tolist()
                        idx = as_list.index(df1.index)
                        as_list[idx] = pd.to_datetime(start_date)
                        df1.index = as_list
                        df = pd.concat([df1, df])
                        for col in df.columns:
                            df[col].values[0] = 0
                        zero = 1

                    # Keep only the last empty row (well activation date)
                    df = df.iloc[max(zero - 1, 0) :]

                    # Assign well targets to the correct schedule dates
                    df = df.shift(-1)
                    # Make sure the row for the final date is not empty
                    df.iloc[-1] = df.iloc[-2]

                # Set columns that have only exact zero values to np.nan
                df.loc[:, (df == 0).all(axis=0)] = np.nan

                df["WELL_NAME"] = well_name

                df["PHASE"] = None
                df.loc[df["WOPR"] > 0, "PHASE"] = "OIL"
                df.loc[df["WWIR"] > 0, "PHASE"] = "WATER"
                df.loc[df["WGIR"] > 0, "PHASE"] = "GAS"
                df["TYPE"] = None
                df.loc[df["WOPR"] > 0, "TYPE"] = "OP"
                df.loc[df["WWIR"] > 0, "TYPE"] = "WI"
                df.loc[df["WGIR"] > 0, "TYPE"] = "GI"
                # make sure the correct well type is set also when the well is shut in
                df[["PHASE", "TYPE"]] = df[["PHASE", "TYPE"]].fillna(method="backfill")
                df[["PHASE", "TYPE"]] = df[["PHASE", "TYPE"]].fillna(method="ffill")

                df_production_data = df_production_data.append(df)

        if df_production_data["WSTAT"].isna().all():
            warnings.warn(
                "No WSTAT:* summary vectors in input case - setting default well status to OPEN."
            )
            wstat_default = "OPEN"
        else:
            wstat_default = "STOP"

        df_production_data["WSTAT"] = df_production_data["WSTAT"].map(
            {
                1: "OPEN",  # Producer OPEN
                2: "OPEN",  # Injector OPEN
                3: "SHUT",
                4: "STOP",
                5: "SHUT",  # PSHUT
                6: "STOP",  # PSTOP
                np.nan: wstat_default,
            }
        )

        # ensure that a type is assigned also if a well is never activated
        df_production_data[["PHASE", "TYPE"]] = df_production_data[
            ["PHASE", "TYPE"]
        ].fillna(method="backfill")
        df_production_data[["PHASE", "TYPE"]] = df_production_data[
            ["PHASE", "TYPE"]
        ].fillna(method="ffill")

        df_production_data["date"] = df_production_data.index
        df_production_data["date"] = pd.to_datetime(df_production_data["date"]).dt.date

        return df_production_data

    def _faults(self) -> pd.DataFrame:
        """
        Function to read fault plane data using ecl2df.

        Returns:
            A dataframe with columns NAME, X, Y, Z with data for fault planes

        """
        eclfile = EclFiles(self._input_case)
        df_fault_keyword = faults.df(eclfile)

        points = []
        for _, row in df_fault_keyword.iterrows():

            i = row["I"] - 1
            j = row["J"] - 1
            k = row["K"] - 1

            points.append((row["NAME"], i, j, k))

            if row["FACE"] == "X" or row["FACE"] == "X+":
                points.append((row["NAME"], i + 1, j, k))
            elif row["FACE"] == "Y" or row["FACE"] == "Y+":
                points.append((row["NAME"], i, j + 1, k))
            elif row["FACE"] == "Z" or row["FACE"] == "Z+":
                points.append((row["NAME"], i, j, k + 1))
            elif row["FACE"] == "X-":
                points.append((row["NAME"], i - 1, j, k))
            elif row["FACE"] == "Y-":
                points.append((row["NAME"], i, j - 1, k))
            elif row["FACE"] == "Z-":
                points.append((row["NAME"], i, j, k - 1))
            else:
                raise ValueError(
                    f"Could not interpret '{row['FACE']}' while reading the FAULTS keyword."
                )

        df_faults = pd.DataFrame.from_records(points, columns=["NAME", "I", "J", "K"])

        if not df_faults.empty:
            df_faults[["X", "Y", "Z"]] = pd.DataFrame(
                df_faults.apply(
                    lambda row: list(
                        self._grid.get_xyz(ijk=(row["I"], row["J"], row["K"]))
                    ),
                    axis=1,
                ).values.tolist()
            )

        return df_faults.drop(["I", "J", "K"], axis=1)

    def _grid_cell_bounding_boxes(self) -> np.ndarray:
        """
        Function to get the bounding box (x, y and z min + max) for all grid cells

        Returns:
            A (active grid cells x 6) numpy array with columns [ xmin, xmax, ymin, ymax, zmin, zmax ]
        """
        xyz = np.empty((8 * self._grid.get_num_active(), 3))
        for active_index in range(self._grid.get_num_active()):
            for corner in range(0, 8):
                xyz[active_index * 8 + corner, :] = self._grid.get_cell_corner(
                    corner, active_index=active_index
                )

        xmin = xyz[:, 0].reshape(-1, 8).min(axis=1)
        xmax = xyz[:, 0].reshape(-1, 8).max(axis=1)
        ymin = xyz[:, 1].reshape(-1, 8).min(axis=1)
        ymax = xyz[:, 1].reshape(-1, 8).max(axis=1)
        zmin = xyz[:, 2].reshape(-1, 8).min(axis=1)
        zmax = xyz[:, 2].reshape(-1, 8).max(axis=1)

        return np.vstack([xmin, xmax, ymin, ymax, zmin, zmax]).T

    def _get_start_date(self):
        return self._eclsum.start_date

    @property
    def grid_cell_bounding_boxes(self) -> np.ndarray:
        """Boundingboxes for all gridcells"""
        return self._grid_cell_bounding_boxes()

    @property
    def faults(self) -> pd.DataFrame:
        """dataframe with all fault data"""
        return self._faults()

    @property
    def production(self) -> pd.DataFrame:
        """dataframe with all production data"""
        return self._production_data()

    @property
    def coordinates(self) -> pd.DataFrame:
        """dataframe with all coordinates"""
        return self._coordinates()