Esempio n. 1
0
class EclipseData(FromSource):
    """
    Eclipse data source class

    Args:
         eclipse_case: Full path to eclipse case to load data from
         resample: Pandas resampling string
         perforation_handling_strategy: How to deal with perforations per well.
                                                 ('bottom_point', 'top_point', 'multiple')

    """

    def __init__(
        self,
        eclipse_case: Union[Path, str],
        resample: Optional[str] = None,
        perforation_handling_strategy: str = "bottom_point",
    ):
        super().__init__()

        self._eclipse_case: Path = Path(eclipse_case)
        self._eclsum = EclSum(str(self._eclipse_case))
        self._grid = EclGrid(str(self._eclipse_case.with_suffix(".EGRID")))
        self._restart = EclFile(str(self._eclipse_case.with_suffix(".UNRST")))
        self._wells = WellInfo(
            self._grid, rst_file=self._restart, load_segment_information=True
        )

        self._resample: Union[str, None] = resample
        self._perforation_handling_strategy: str = perforation_handling_strategy

    def _coordinates(self) -> pd.DataFrame:
        """
        Function to extract well coordinates from an Flow/Eclipse simulation.

        Args:
            filename: Entire path to the simulated simulation case. This
                            case must have both and EGRID and UNRST file.
            perforation_handling_strategy: How to deal with perforations per well.
                                                ('bottom_point', 'top_point', 'multiple')

        Returns:
            columns: WELL_NAME, X, Y, Z

        """

        def multi_xyz_append(append_obj_list):
            for global_conn in append_obj_list[1]:
                coords.append(
                    [append_obj_list[0], *self._grid.get_xyz(ijk=global_conn.ijk())]
                )

        coords: List = []

        for well_name in self._wells.allWellNames():
            global_conns = self._wells[well_name][0].globalConnections()
            coord_append = coords.append
            if self._perforation_handling_strategy == "bottom_point":
                xyz = self._grid.get_xyz(ijk=global_conns[-1].ijk())
            elif self._perforation_handling_strategy == "top_point":
                xyz = self._grid.get_xyz(ijk=global_conns[0].ijk())
            elif self._perforation_handling_strategy == "multiple":
                xyz = [global_conns]
                coord_append = multi_xyz_append
            else:
                raise Exception(
                    f"perforation strategy {self._perforation_handling_strategy} unknown"
                )

            coord_append([well_name, *xyz])

        return pd.DataFrame(coords, columns=["WELL_NAME", "X", "Y", "Z"])

    def _production_data(self) -> pd.DataFrame:
        """
        Function to read production data for all producers and injectors from an
        Flow/Eclipse simulation. The simulation is required to write out the
        following vectors to the summary file: WOPR, WGPR, WWPR, WBHP, WTHP, WGIR, WWIR

        Returns:
            A DataFrame with a DateTimeIndex and the following columns:
                - date          equal to index
                - WELL_NAME     Well name as used in Eclipse
                - WOPR          Well Oil Production Rate
                - WGPR          Well Gas Production Rate
                - WWPR          Well Water Production Rate
                - WBHP          Well Bottom Hole Pressure
                - WTHP          Well Tubing Head Pressure
                - WGIR          Well Gas Injection Rate
                - WWIR          Well Water Injection Rate
                - WSTAT         Well status (OPEN, SHUT, STOP)
                - TYPE          Well Type: "OP", "GP", "WI", "GI"
                - PHASE         Main producing/injecting phase fluid: "OIL", "GAS", "WATER"

        Todo:
            * Remove depreciation warning suppression when solved in LibEcl.
            * Improve robustness pf setting of Phase and Type.

        """
        keys = ["WOPR", "WGPR", "WWPR", "WBHP", "WTHP", "WGIR", "WWIR", "WSTAT"]

        df_production_data = pd.DataFrame()

        start_date = self._get_start_date()

        # Suppress a depreciation warning inside LibEcl
        warnings.simplefilter("ignore", category=DeprecationWarning)
        with warnings.catch_warnings():

            for well_name in self._eclsum.wells():
                df = pd.DataFrame()

                df["date"] = self._eclsum.dates
                df["date"] = pd.to_datetime(df["date"])
                df.set_index("date", inplace=True)

                for prod_key in keys:
                    try:
                        df[f"{prod_key}"] = self._eclsum[
                            f"{prod_key}:{well_name}"
                        ].values
                    except KeyError:
                        df[f"{prod_key}"] = np.nan

                # Find number of leading empty rows (with only nan or 0 values)
                zero = df.fillna(0).eq(0).all(1).sum()

                if zero < df.shape[0]:
                    # If there are no empty rows, prepend one for the start date
                    if zero == 0:
                        df1 = df.head(1)
                        as_list = df1.index.tolist()
                        idx = as_list.index(df1.index)
                        as_list[idx] = pd.to_datetime(start_date)
                        df1.index = as_list
                        df = pd.concat([df1, df])
                        for col in df.columns:
                            df[col].values[0] = 0
                        zero = 1

                    # Keep only the last empty row (well activation date)
                    df = df.iloc[max(zero - 1, 0) :]

                    # Assign well targets to the correct schedule dates
                    df = df.shift(-1)
                    # Make sure the row for the final date is not empty
                    df.iloc[-1] = df.iloc[-2]

                # Set columns that have only exact zero values to np.nan
                df.loc[:, (df == 0).all(axis=0)] = np.nan

                df["WELL_NAME"] = well_name
                df_production_data = df_production_data.append(df)

        df_production_data["PHASE"] = None
        df_production_data.loc[df_production_data["WOPR"] > 0, "PHASE"] = "OIL"
        df_production_data.loc[df_production_data["WWIR"] > 0, "PHASE"] = "WATER"
        df_production_data.loc[df_production_data["WGIR"] > 0, "PHASE"] = "GAS"

        df_production_data["WSTAT"] = df_production_data["WSTAT"].map(
            {
                1: "OPEN",  # Producer OPEN
                2: "OPEN",  # Injector OPEN
                3: "SHUT",
                4: "STOP",
                5: "SHUT",  # PSHUT
                6: "STOP",  # PSTOP
                np.nan: "STOP",
            }
        )

        df_production_data["TYPE"] = None
        df_production_data.loc[df_production_data["WOPR"] > 0, "TYPE"] = "OP"
        df_production_data.loc[df_production_data["WWIR"] > 0, "TYPE"] = "WI"
        df_production_data.loc[df_production_data["WGIR"] > 0, "TYPE"] = "GI"

        df_production_data[["PHASE", "TYPE"]] = df_production_data[
            ["PHASE", "TYPE"]
        ].fillna(method="backfill")

        df_production_data["date"] = df_production_data.index
        df_production_data["date"] = pd.to_datetime(df_production_data["date"]).dt.date

        return df_production_data

    def _faults(self) -> pd.DataFrame:
        """
        Function to read fault plane data using ecl2df.

        Returns:
            A dataframe with columns NAME, X, Y, Z with data for fault planes

        """
        eclfile = EclFiles(self._eclipse_case)
        df_fault_keyword = faults.df(eclfile)

        points = []
        for _, row in df_fault_keyword.iterrows():

            i = row["I"] - 1
            j = row["J"] - 1
            k = row["K"] - 1

            points.append((row["NAME"], i, j, k))

            if row["FACE"] == "X" or row["FACE"] == "X+":
                points.append((row["NAME"], i + 1, j, k))
            elif row["FACE"] == "Y" or row["FACE"] == "Y+":
                points.append((row["NAME"], i, j + 1, k))
            elif row["FACE"] == "Z" or row["FACE"] == "Z+":
                points.append((row["NAME"], i, j, k + 1))
            elif row["FACE"] == "X-":
                points.append((row["NAME"], i - 1, j, k))
            elif row["FACE"] == "Y-":
                points.append((row["NAME"], i, j - 1, k))
            elif row["FACE"] == "Z-":
                points.append((row["NAME"], i, j, k - 1))
            else:
                raise ValueError(
                    f"Could not interpret '{row['FACE']}' while reading the FAULTS keyword."
                )

        df_faults = pd.DataFrame.from_records(points, columns=["NAME", "I", "J", "K"])

        if not df_faults.empty:
            df_faults[["X", "Y", "Z"]] = pd.DataFrame(
                df_faults.apply(
                    lambda row: list(
                        self._grid.get_xyz(ijk=(row["I"], row["J"], row["K"]))
                    ),
                    axis=1,
                ).values.tolist()
            )

        return df_faults.drop(["I", "J", "K"], axis=1)

    def _get_start_date(self):
        return self._eclsum.start_date

    @property
    def faults(self) -> pd.DataFrame:
        """dataframe with all fault data"""
        return self._faults()

    @property
    def production(self) -> pd.DataFrame:
        """dataframe with all production data"""
        return self._production_data()

    @property
    def coordinates(self) -> pd.DataFrame:
        """dataframe with all coordinates"""
        return self._coordinates()
Esempio n. 2
0
class FlowData(FromSource):
    """
    Flow data source class

    Args:
         input_case: Full path to eclipse case to load data from
         perforation_handling_strategy: How to deal with perforations per well.
                                                 ('bottom_point', 'top_point', 'multiple')

    """

    def __init__(
        self,
        input_case: Union[Path, str],
        perforation_handling_strategy: str = "bottom_point",
    ):
        super().__init__()

        self._input_case: Path = Path(input_case)
        self._eclsum = EclSum(str(self._input_case))
        self._grid = EclGrid(str(self._input_case.with_suffix(".EGRID")))
        self._restart = EclFile(str(self._input_case.with_suffix(".UNRST")))
        self._wells = WellInfo(
            self._grid, rst_file=self._restart, load_segment_information=True
        )

        self._perforation_handling_strategy: str = perforation_handling_strategy

    # pylint: disable=too-many-branches
    def _coordinates(self) -> pd.DataFrame:
        """
        Function to extract well coordinates from an Flow simulation.

        Returns:
            columns: WELL_NAME, X, Y, Z

        """

        def multi_xyz_append(append_obj_list):
            for global_conn in append_obj_list[1]:
                coords.append(
                    [append_obj_list[0], *self._grid.get_xyz(ijk=global_conn.ijk())]
                )

        coords: List = []

        for well_name in self._wells.allWellNames():
            global_conns = self._wells[well_name][0].globalConnections()
            coord_append = coords.append
            if self._perforation_handling_strategy == "bottom_point":
                xyz = self._grid.get_xyz(ijk=global_conns[-1].ijk())
            elif self._perforation_handling_strategy == "top_point":
                xyz = self._grid.get_xyz(ijk=global_conns[0].ijk())
            elif self._perforation_handling_strategy == "multiple":
                xyz = [global_conns]
                coord_append = multi_xyz_append
            elif self._perforation_handling_strategy == "time_avg_open_location":
                connection_open_time = {}

                for i, conn_status in enumerate(self._wells[well_name]):
                    time = datetime.datetime.strptime(
                        str(conn_status.simulationTime()), "%Y-%m-%d %H:%M:%S"
                    )
                    if i == 0:
                        prev_time = time

                    for connection in conn_status.globalConnections():
                        if connection.ijk() not in connection_open_time:
                            connection_open_time[connection.ijk()] = 0.0
                        elif connection.isOpen():
                            connection_open_time[connection.ijk()] += (
                                time - prev_time
                            ).total_seconds()
                        else:
                            connection_open_time[connection.ijk()] += 0.0

                    prev_time = time

                xyz_values = np.zeros((1, 3), dtype=np.float64)
                total_open_time = sum(connection_open_time.values())

                if total_open_time > 0:
                    for connection, open_time in connection_open_time.items():
                        xyz_values += np.multiply(
                            np.array(self._grid.get_xyz(ijk=connection)),
                            open_time / total_open_time,
                        )
                else:
                    for connection, open_time in connection_open_time.items():
                        xyz_values += np.divide(
                            np.array(self._grid.get_xyz(ijk=connection)),
                            len(connection_open_time.items()),
                        )

                xyz = tuple(*xyz_values)

            else:
                raise Exception(
                    f"perforation strategy {self._perforation_handling_strategy} unknown"
                )

            coord_append([well_name, *xyz])

        return pd.DataFrame(coords, columns=["WELL_NAME", "X", "Y", "Z"])

    def _production_data(self) -> pd.DataFrame:
        """
        Function to read production data for all producers and injectors from an
        Flow simulation. The simulation is required to write out the
        following vectors to the summary file: WOPR, WGPR, WWPR, WBHP, WTHP, WGIR, WWIR

        Returns:
            A DataFrame with a DateTimeIndex and the following columns:
                - date          equal to index
                - WELL_NAME     Well name as used in Flow
                - WOPR          Well Oil Production Rate
                - WGPR          Well Gas Production Rate
                - WWPR          Well Water Production Rate
                - WBHP          Well Bottom Hole Pressure
                - WTHP          Well Tubing Head Pressure
                - WGIR          Well Gas Injection Rate
                - WWIR          Well Water Injection Rate
                - WSTAT         Well status (OPEN, SHUT, STOP)
                - TYPE          Well Type: "OP", "GP", "WI", "GI"
                - PHASE         Main producing/injecting phase fluid: "OIL", "GAS", "WATER"

        Todo:
            * Remove depreciation warning suppression when solved in LibEcl.
            * Improve robustness pf setting of Phase and Type.

        """
        keys = ["WOPR", "WGPR", "WWPR", "WBHP", "WTHP", "WGIR", "WWIR", "WSTAT"]

        df_production_data = pd.DataFrame()

        start_date = self._get_start_date()

        # Suppress a depreciation warning inside LibEcl
        warnings.simplefilter("ignore", category=DeprecationWarning)
        with warnings.catch_warnings():

            for well_name in self._eclsum.wells():
                df = pd.DataFrame()

                df["date"] = self._eclsum.report_dates
                df["date"] = pd.to_datetime(df["date"])
                df.set_index("date", inplace=True)

                for prod_key in keys:
                    try:
                        df[f"{prod_key}"] = self._eclsum.get_values(
                            f"{prod_key}:{well_name}", report_only=True
                        )
                    except KeyError:
                        df[f"{prod_key}"] = np.nan

                # Find number of leading empty rows (with only nan or 0 values)
                zero = df.fillna(0).eq(0).all(1).sum()

                if zero < df.shape[0]:
                    # If there are no empty rows, prepend one for the start date
                    if zero == 0:
                        df1 = df.head(1)
                        as_list = df1.index.tolist()
                        idx = as_list.index(df1.index)
                        as_list[idx] = pd.to_datetime(start_date)
                        df1.index = as_list
                        df = pd.concat([df1, df])
                        for col in df.columns:
                            df[col].values[0] = 0
                        zero = 1

                    # Keep only the last empty row (well activation date)
                    df = df.iloc[max(zero - 1, 0) :]

                    # Assign well targets to the correct schedule dates
                    df = df.shift(-1)
                    # Make sure the row for the final date is not empty
                    df.iloc[-1] = df.iloc[-2]

                # Set columns that have only exact zero values to np.nan
                df.loc[:, (df == 0).all(axis=0)] = np.nan

                df["WELL_NAME"] = well_name

                df["PHASE"] = None
                df.loc[df["WOPR"] > 0, "PHASE"] = "OIL"
                df.loc[df["WWIR"] > 0, "PHASE"] = "WATER"
                df.loc[df["WGIR"] > 0, "PHASE"] = "GAS"
                df["TYPE"] = None
                df.loc[df["WOPR"] > 0, "TYPE"] = "OP"
                df.loc[df["WWIR"] > 0, "TYPE"] = "WI"
                df.loc[df["WGIR"] > 0, "TYPE"] = "GI"
                # make sure the correct well type is set also when the well is shut in
                df[["PHASE", "TYPE"]] = df[["PHASE", "TYPE"]].fillna(method="backfill")
                df[["PHASE", "TYPE"]] = df[["PHASE", "TYPE"]].fillna(method="ffill")

                df_production_data = df_production_data.append(df)

        if df_production_data["WSTAT"].isna().all():
            warnings.warn(
                "No WSTAT:* summary vectors in input case - setting default well status to OPEN."
            )
            wstat_default = "OPEN"
        else:
            wstat_default = "STOP"

        df_production_data["WSTAT"] = df_production_data["WSTAT"].map(
            {
                1: "OPEN",  # Producer OPEN
                2: "OPEN",  # Injector OPEN
                3: "SHUT",
                4: "STOP",
                5: "SHUT",  # PSHUT
                6: "STOP",  # PSTOP
                np.nan: wstat_default,
            }
        )

        # ensure that a type is assigned also if a well is never activated
        df_production_data[["PHASE", "TYPE"]] = df_production_data[
            ["PHASE", "TYPE"]
        ].fillna(method="backfill")
        df_production_data[["PHASE", "TYPE"]] = df_production_data[
            ["PHASE", "TYPE"]
        ].fillna(method="ffill")

        df_production_data["date"] = df_production_data.index
        df_production_data["date"] = pd.to_datetime(df_production_data["date"]).dt.date

        return df_production_data

    def _faults(self) -> pd.DataFrame:
        """
        Function to read fault plane data using ecl2df.

        Returns:
            A dataframe with columns NAME, X, Y, Z with data for fault planes

        """
        eclfile = EclFiles(self._input_case)
        df_fault_keyword = faults.df(eclfile)

        points = []
        for _, row in df_fault_keyword.iterrows():

            i = row["I"] - 1
            j = row["J"] - 1
            k = row["K"] - 1

            points.append((row["NAME"], i, j, k))

            if row["FACE"] == "X" or row["FACE"] == "X+":
                points.append((row["NAME"], i + 1, j, k))
            elif row["FACE"] == "Y" or row["FACE"] == "Y+":
                points.append((row["NAME"], i, j + 1, k))
            elif row["FACE"] == "Z" or row["FACE"] == "Z+":
                points.append((row["NAME"], i, j, k + 1))
            elif row["FACE"] == "X-":
                points.append((row["NAME"], i - 1, j, k))
            elif row["FACE"] == "Y-":
                points.append((row["NAME"], i, j - 1, k))
            elif row["FACE"] == "Z-":
                points.append((row["NAME"], i, j, k - 1))
            else:
                raise ValueError(
                    f"Could not interpret '{row['FACE']}' while reading the FAULTS keyword."
                )

        df_faults = pd.DataFrame.from_records(points, columns=["NAME", "I", "J", "K"])

        if not df_faults.empty:
            df_faults[["X", "Y", "Z"]] = pd.DataFrame(
                df_faults.apply(
                    lambda row: list(
                        self._grid.get_xyz(ijk=(row["I"], row["J"], row["K"]))
                    ),
                    axis=1,
                ).values.tolist()
            )

        return df_faults.drop(["I", "J", "K"], axis=1)

    def _grid_cell_bounding_boxes(self) -> np.ndarray:
        """
        Function to get the bounding box (x, y and z min + max) for all grid cells

        Returns:
            A (active grid cells x 6) numpy array with columns [ xmin, xmax, ymin, ymax, zmin, zmax ]
        """
        xyz = np.empty((8 * self._grid.get_num_active(), 3))
        for active_index in range(self._grid.get_num_active()):
            for corner in range(0, 8):
                xyz[active_index * 8 + corner, :] = self._grid.get_cell_corner(
                    corner, active_index=active_index
                )

        xmin = xyz[:, 0].reshape(-1, 8).min(axis=1)
        xmax = xyz[:, 0].reshape(-1, 8).max(axis=1)
        ymin = xyz[:, 1].reshape(-1, 8).min(axis=1)
        ymax = xyz[:, 1].reshape(-1, 8).max(axis=1)
        zmin = xyz[:, 2].reshape(-1, 8).min(axis=1)
        zmax = xyz[:, 2].reshape(-1, 8).max(axis=1)

        return np.vstack([xmin, xmax, ymin, ymax, zmin, zmax]).T

    def _get_start_date(self):
        return self._eclsum.start_date

    @property
    def grid_cell_bounding_boxes(self) -> np.ndarray:
        """Boundingboxes for all gridcells"""
        return self._grid_cell_bounding_boxes()

    @property
    def faults(self) -> pd.DataFrame:
        """dataframe with all fault data"""
        return self._faults()

    @property
    def production(self) -> pd.DataFrame:
        """dataframe with all production data"""
        return self._production_data()

    @property
    def coordinates(self) -> pd.DataFrame:
        """dataframe with all coordinates"""
        return self._coordinates()