Python df Beispiele, ecl2df.grid.df Python Beispiele

Beispiel #1

0

Datei anzeigen

def test_gridzonemap():
    """Check that zonemap can be merged automatically be default, and also
    that there is some API for supplying the zonemap directly as a dictionary"""
    eclfiles = EclFiles(DATAFILE)
    grid_geom = grid.gridgeometry2df(eclfiles, zonemap=None)

    default_zonemap = grid_geom["ZONE"]

    grid_no_zone = grid.gridgeometry2df(eclfiles, zonemap={})
    assert "ZONE" not in grid_no_zone

    assert (grid.df(eclfiles, zonemap=None)["ZONE"] == default_zonemap).all()

    df_no_zone = grid.df(eclfiles, zonemap={})
    assert "ZONE" not in df_no_zone

    df_custom_zone = grid.gridgeometry2df(eclfiles, zonemap={1: "FIRSTLAYER"})
    assert "ZONE" in df_custom_zone
    assert set(
        df_custom_zone[df_custom_zone["K"] == 1]["ZONE"].unique()) == set(
            ["FIRSTLAYER"])
    assert len(df_custom_zone) == len(grid_no_zone)

    df_bogus_zones = grid.gridgeometry2df(eclfiles,
                                          zonemap={999999: "nonexistinglayer"})
    assert pd.isnull(df_bogus_zones["ZONE"]).all()

    # Test a custom "subzone" map via direct usage of merge_zone on an dataframe
    # where ZONE already exists:

    dframe = grid.df(eclfiles)
    subzonemap = {1: "SUBZONE1", 2: "SUBZONE2"}
    dframe = common.merge_zones(dframe,
                                subzonemap,
                                zoneheader="SUBZONE",
                                kname="K")
    assert (dframe["ZONE"] == default_zonemap).all()
    assert set(dframe[dframe["K"] == 1]["SUBZONE"].unique()) == set(
        ["SUBZONE1"])
    assert set(dframe[dframe["K"] == 2]["SUBZONE"].unique()) == set(
        ["SUBZONE2"])
    assert len(dframe) == len(grid_no_zone)

Beispiel #2

0

Datei anzeigen

def test_grid_df():
    """Test that dataframe with INIT vectors and coordinates can be produced"""
    eclfiles = EclFiles(DATAFILE)
    grid_df = grid.df(eclfiles)

    assert isinstance(grid_df, pd.DataFrame)
    assert not grid_df.empty
    assert "PERMX" in grid_df
    assert "PORO" in grid_df
    assert "PORV" in grid_df
    assert "I" in grid_df
    assert "J" in grid_df
    assert "K" in grid_df
    assert "X" in grid_df
    assert "Y" in grid_df
    assert "Z" in grid_df
    assert "VOLUME" in grid_df

    # Check that PORV is sensible
    assert (abs(sum(grid_df["PORO"] * grid_df["VOLUME"] - grid_df["PORV"])) /
            sum(grid_df["PORV"]) < 0.00001)

Beispiel #3

0

Datei anzeigen

Datei: test_grid.py Projekt: lindjoha/ecl2df

def test_df():
    """Test the df function"""
    eclfiles = EclFiles(REEK)
    # assert error..
    with pytest.raises(TypeError):
        # pylint: disable=no-value-for-parameter
        grid.df()

    grid_df = grid.df(eclfiles)
    assert not grid_df.empty
    assert "I" in grid_df  # From GRID
    assert "PORO" in grid_df  # From INIT
    assert "SOIL" not in grid_df  # We do not get RST unless we ask for it.

    grid_df = grid.df(eclfiles, vectors="*")
    assert "I" in grid_df  # From GRID
    assert "PORO" in grid_df  # From INIT
    assert "SOIL" not in grid_df  # We do not get RST unless we ask for it.

    grid_df = grid.df(eclfiles, vectors=["*"])
    assert "I" in grid_df  # From GRID
    assert "PORO" in grid_df  # From INIT
    assert "SOIL" not in grid_df  # We do not get RST unless we ask for it.

    grid_df = grid.df(eclfiles, vectors="PRESSURE")
    assert "I" in grid_df
    assert "PRESSURE" not in grid_df  # that vector is only in RST
    assert len(grid_df) == 35817
    assert "VOLUME" in grid_df

    grid_df = grid.df(eclfiles, vectors=["PRESSURE"])
    assert "I" in grid_df
    assert not grid_df.empty
    assert "PRESSURE" not in grid_df
    geometry_cols = len(grid_df.columns)

    grid_df = grid.df(eclfiles, vectors=["PRESSURE"], rstdates="last", stackdates=True)
    assert "PRESSURE" in grid_df
    assert len(grid_df.columns) == geometry_cols + 2
    assert "DATE" in grid_df  # Present because of stackdates

    grid_df = grid.df(eclfiles, vectors="PRESSURE", rstdates="last")
    assert "PRESSURE" in grid_df
    assert len(grid_df.columns) == geometry_cols + 1

    grid_df = grid.df(eclfiles, vectors="PRESSURE", rstdates="last", dateinheaders=True)
    assert "PRESSURE" not in grid_df
    assert "PRESSURE@2001-08-01" in grid_df

    grid_df = grid.df(
        eclfiles, vectors=["PORO", "PRESSURE"], rstdates="all", stackdates=True
    )
    assert "PRESSURE" in grid_df
    assert len(grid_df.columns) == geometry_cols + 3
    assert "DATE" in grid_df
    assert len(grid_df["DATE"].unique()) == 4
    assert not grid_df.isna().any().any()
    # Check that all but the dynamic data has been repeated:
    df1 = (
        grid_df[grid_df["DATE"] == "2000-01-01"]
        .drop(["DATE", "PRESSURE"], axis=1)
        .reset_index(drop=True)
    )
    df2 = (
        grid_df[grid_df["DATE"] == "2000-07-01"]
        .drop(["PRESSURE", "DATE"], axis=1)
        .reset_index(drop=True)
    )
    df3 = (
        grid_df[grid_df["DATE"] == "2001-02-01"]
        .drop(["PRESSURE", "DATE"], axis=1)
        .reset_index(drop=True)
    )
    df4 = (
        grid_df[grid_df["DATE"] == "2001-08-01"]
        .drop(["PRESSURE", "DATE"], axis=1)
        .reset_index(drop=True)
    )
    pd.testing.assert_frame_equal(df1, df2)
    pd.testing.assert_frame_equal(df1, df3)
    pd.testing.assert_frame_equal(df1, df4)

    grid_df = grid.df(eclfiles, vectors="PORO")
    assert "I" in grid_df
    assert "PORO" in grid_df
    assert len(grid_df) == 35817
    assert "DATE" not in grid_df

    grid_df = grid.df(eclfiles, vectors="PORO", rstdates="all")
    assert "I" in grid_df
    assert "PORO" in grid_df
    assert "DATE" not in grid_df
    # (no RST columns, so no DATE info in the dataframe)
    # (warnings should be printed)

    grid_df = grid.df(eclfiles, vectors="PORO", rstdates="all", stackdates=True)
    assert "I" in grid_df
    assert "PORO" in grid_df
    assert "DATE" not in grid_df

Beispiel #4

0

Datei anzeigen

Datei: test_grid.py Projekt: lindjoha/ecl2df

def test_df2ecl(tmp_path):
    """Test if we are able to output include files for grid data"""
    eclfiles = EclFiles(REEK)
    grid_df = grid.df(eclfiles)

    fipnum_str = grid.df2ecl(grid_df, "FIPNUM", dtype=int)
    assert grid.df2ecl(grid_df, "FIPNUM", dtype="int", nocomments=True) == grid.df2ecl(
        grid_df, "FIPNUM", dtype=int, nocomments=True
    )
    with pytest.raises(ValueError, match="Wrong dtype argument foo"):
        grid.df2ecl(grid_df, "FIPNUM", dtype="foo")

    assert "FIPNUM" in fipnum_str
    assert "-- Output file printed by ecl2df.grid" in fipnum_str
    assert "35817 active cells" in fipnum_str  # (comment at the end)
    assert "35840 total cell count" in fipnum_str  # (comment at the end)
    assert len(fipnum_str) > 100

    fipnum_str_nocomment = grid.df2ecl(grid_df, "FIPNUM", dtype=int, nocomments=True)
    assert "--" not in fipnum_str_nocomment
    fipnum2_str = grid.df2ecl(
        grid_df, "FIPNUM", dtype=int, eclfiles=eclfiles, nocomments=True
    )
    # This would mean that we guessed the correct global size in the first run
    assert fipnum_str_nocomment == fipnum2_str

    float_fipnum_str = grid.df2ecl(grid_df, "FIPNUM", dtype=float)
    assert len(float_fipnum_str) > len(fipnum_str)  # lots of .0 in the string.

    fipsatnum_str = grid.df2ecl(grid_df, ["FIPNUM", "SATNUM"], dtype=int)
    assert "FIPNUM" in fipsatnum_str
    assert "SATNUM" in fipsatnum_str

    grid_df["FIPNUM"] = grid_df["FIPNUM"] * 3333
    fipnum_big_str = grid.df2ecl(grid_df, "FIPNUM", dtype=int)
    assert "3333" in fipnum_big_str
    assert len(fipnum_big_str) > len(fipnum_str)

    os.chdir(tmp_path)
    grid.df2ecl(grid_df, ["PERMX", "PERMY", "PERMZ"], dtype=float, filename="perm.inc")
    assert Path("perm.inc").is_file()
    incstring = open("perm.inc").readlines()
    assert sum([1 for line in incstring if "PERM" in line]) == 6

    assert grid.df2ecl(grid_df, ["PERMX"], dtype=float, nocomments=True) == grid.df2ecl(
        grid_df, ["PERMX"], dtype="float", nocomments=True
    )

    # with pytest.raises(ValueError, match="Wrong dtype argument"):
    grid.df2ecl(grid_df, ["PERMX"], dtype=dict)

    with pytest.raises(ValueError):
        grid.df2ecl(grid_df, ["PERMRR"])

    # Check when we have restart info included:
    gr_rst = grid.df(eclfiles, rstdates="all")
    fipnum_str_rst = grid.df2ecl(gr_rst, "FIPNUM", dtype=int, nocomments=True)
    assert fipnum_str_rst == fipnum_str_nocomment

    # When dates are stacked, there are NaN's  in the FIPNUM column,
    # which should be gracefully ignored.
    gr_rst_stacked = grid.df(eclfiles, rstdates="all", stackdates=True)
    fipnum_str_rst = grid.df2ecl(gr_rst_stacked, "FIPNUM", dtype=int, nocomments=True)
    assert fipnum_str_rst == fipnum_str_nocomment

    # dateinheaders here will be ignored due to stackdates:
    pd.testing.assert_frame_equal(
        gr_rst_stacked,
        grid.df(eclfiles, rstdates="all", stackdates=True, dateinheaders=True),
    )

Beispiel #5

0

Datei anzeigen

Datei: pillars.py Projekt: lindjoha/ecl2df

def df(
    eclfiles: EclFiles,
    region: str = None,
    rstdates: Optional[Union[str, datetime.date, List[datetime.date]]] = None,
    soilcutoff: float = 0.2,
    sgascutoff: float = 0.7,
    swatcutoff: float = 0.7,
    stackdates: bool = False,
) -> pd.DataFrame:
    """Produce a dataframe with pillar information

    This is the "main" function for Python API users
    Produces a dataframe with data for each I-J combination
    (in the column PILLAR), and if a region parameter is
    supplied, also pr. region.

    PORV is the summed porevolume of the pillar (in the region),
    VOLUME is bulk volume, and PORO is porevolume weighted porosity
    PERM columns contain unweighted value averages, use with caution.

    If a restart date is picked, then SWAT and SGAS will
    be used to compute volumes pr. phase, WATVOL, OILVOL and GASVOL. The
    columns with dynamic data will include the date in the column headers
    like SWAT@2009-01-01

    Args:
        region: A parameter the pillars will be split
            on. Typically EQLNUM or FIPNUM. Set to empty string
            or None to avoid any region grouping.
        rstdates: Dates for which restart data
            is to be extracted. The string can
            be in ISO-format, or one of the mnenomics
            'first', 'last' or 'all'. It can also be a list
            of datetime.date.
        soilcutoff: If not None, an oil-water contact will
            be estimated pr. pillar, based on the deepest cell with
            SOIL above the given cutoff. Value is put in column OWC.
        sgascuttof: If not None, a gas contact will be
            estimated pr pillar, based on the deepest cell with
            SGAS above the given cutoff. Value is put in column GOC.
        swatcutoff: OWC or GWC is only computed for pillars
            where at least one cell is above this value.
        stackdates: If true, a column
            called DATE will be added and data for all restart
            dates will be added in a stacked manner.
    """
    # List of vectors we want, conservative in order to save memory and cputime:
    vectors = []
    if region:
        vectors.append(region)
    vectors.extend(["POR*", "PERM*", "SWAT", "SGAS", "1OVERBO", "1OVERBG"])
    grid_df = grid.df(eclfiles,
                      rstdates=rstdates,
                      vectors=vectors,
                      dateinheaders=True)

    rstdates_iso = grid.dates2rstindices(eclfiles, rstdates)[2]

    grid_df["PILLAR"] = grid_df["I"].astype(str) + "-" + grid_df["J"].astype(
        str)
    logger.info("Computing pillar statistics")
    groupbies = ["PILLAR"]
    if region:
        if region not in grid_df:
            logger.warning("Region parameter %s not found, ignored", region)
        else:
            groupbies.append(region)
            grid_df[region] = grid_df[region].astype(int)

    for datestr in rstdates_iso:
        logger.info("Dynamic volumes for %s", datestr)
        volumes = compute_volumes(grid_df, datestr=datestr)
        grid_df = pd.concat([grid_df, volumes], axis="columns", sort=False)

    aggregators = {
        key: AGGREGATORS[key.split("@")[0]]
        for key in grid_df if key.split("@")[0] in AGGREGATORS
    }

    # Group over PILLAR and possibly regions:
    grouped = (grid_df.groupby(groupbies).agg(aggregators)).reset_index()

    # Compute correct pillar averaged porosity (from bulk)
    if "PORV" in grouped and "VOLUME" in grouped:
        grouped["PORO"] = grouped["PORV"] / grouped["VOLUME"]

    # Compute contacts:
    for datestr in rstdates_iso:
        if "SWAT@" + datestr in grid_df and ("SOIL@" + datestr in grid_df
                                             or "SGAS@" + datestr in grid_df):
            contacts = compute_pillar_contacts(
                grid_df,
                region=region,
                soilcutoff=soilcutoff,
                sgascutoff=sgascutoff,
                swatcutoff=swatcutoff,
                datestr=datestr,
            )
            if not contacts.empty:
                grouped = pd.merge(grouped, contacts, how="left")

    if stackdates:
        return common.stack_on_colnames(grouped,
                                        sep="@",
                                        stackcolname="DATE",
                                        inplace=True)
    return grouped

Beispiel #6

0

Datei anzeigen

def test_df():
    """Test the df function"""
    eclfiles = EclFiles(DATAFILE)
    # assert error..
    with pytest.raises(TypeError):
        # pylint: disable=no-value-for-parameter
        grid.df()

    grid_df = grid.df(eclfiles)
    assert not grid_df.empty
    assert "I" in grid_df  # From GRID
    assert "PORO" in grid_df  # From INIT
    assert "SOIL" not in grid_df  # We do not get RST unless we ask for it.

    grid_df = grid.df(eclfiles, vectors="*")
    assert "I" in grid_df  # From GRID
    assert "PORO" in grid_df  # From INIT
    assert "SOIL" not in grid_df  # We do not get RST unless we ask for it.

    grid_df = grid.df(eclfiles, vectors=["*"])
    assert "I" in grid_df  # From GRID
    assert "PORO" in grid_df  # From INIT
    assert "SOIL" not in grid_df  # We do not get RST unless we ask for it.

    grid_df = grid.df(eclfiles, vectors="PRESSURE")
    assert "I" in grid_df
    assert "PRESSURE" not in grid_df  # that vector is only in RST
    assert len(grid_df) == 35817
    assert "VOLUME" in grid_df

    grid_df = grid.df(eclfiles, vectors=["PRESSURE"])
    assert "I" in grid_df
    assert not grid_df.empty
    assert "PRESSURE" not in grid_df
    geometry_cols = len(grid_df.columns)

    grid_df = grid.df(eclfiles,
                      vectors=["PRESSURE"],
                      rstdates="last",
                      stackdates=True)
    assert "PRESSURE" in grid_df
    assert len(grid_df.columns) == geometry_cols + 2
    assert "DATE" in grid_df  # awaits stacking

    grid_df = grid.df(eclfiles, vectors="PRESSURE", rstdates="last")
    assert "PRESSURE" in grid_df
    assert len(grid_df.columns) == geometry_cols + 1

    grid_df = grid.df(eclfiles,
                      vectors="PRESSURE",
                      rstdates="last",
                      dateinheaders=True)
    assert "PRESSURE" not in grid_df
    assert "PRESSURE@2001-08-01" in grid_df

    grid_df = grid.df(eclfiles,
                      vectors="PRESSURE",
                      rstdates="all",
                      stackdates=True)
    assert "PRESSURE" in grid_df
    assert len(grid_df.columns) == geometry_cols + 2
    assert "DATE" in grid_df
    assert len(grid_df["DATE"].unique()) == 4

    grid_df = grid.df(eclfiles, vectors="PORO")
    assert "I" in grid_df
    assert "PORO" in grid_df
    assert len(grid_df) == 35817
    assert "DATE" not in grid_df

    grid_df = grid.df(eclfiles, vectors="PORO", rstdates="all")
    assert "I" in grid_df
    assert "PORO" in grid_df
    assert "DATE" not in grid_df
    # (no RST columns, so no DATE info in the daaframe)
    # (warnings should be printed)

    grid_df = grid.df(eclfiles,
                      vectors="PORO",
                      rstdates="all",
                      stackdates=True)
    assert "I" in grid_df
    assert "PORO" in grid_df
    assert "DATE" not in grid_df

Beispiel #7

0

Datei anzeigen

def test_df2ecl(tmpdir):
    """Test if we are able to output include files for grid data"""
    eclfiles = EclFiles(DATAFILE)
    grid_df = grid.df(eclfiles)

    fipnum_str = grid.df2ecl(grid_df, "FIPNUM", dtype=int)
    assert "FIPNUM" in fipnum_str
    assert "-- Output file printed by ecl2df.grid" in fipnum_str
    assert "35817 active cells" in fipnum_str  # (comment at the end)
    assert "35840 total cell count" in fipnum_str  # (comment at the end)
    assert len(fipnum_str) > 100

    fipnum_str_nocomment = grid.df2ecl(grid_df,
                                       "FIPNUM",
                                       dtype=int,
                                       nocomments=True)
    assert "--" not in fipnum_str_nocomment
    fipnum2_str = grid.df2ecl(grid_df,
                              "FIPNUM",
                              dtype=int,
                              eclfiles=eclfiles,
                              nocomments=True)
    # This would mean that we guessed the correct global size in the first run
    assert fipnum_str_nocomment == fipnum2_str

    float_fipnum_str = grid.df2ecl(grid_df, "FIPNUM", dtype=float)
    assert len(float_fipnum_str) > len(fipnum_str)  # lots of .0 in the string.

    fipsatnum_str = grid.df2ecl(grid_df, ["FIPNUM", "SATNUM"], dtype=int)
    assert "FIPNUM" in fipsatnum_str
    assert "SATNUM" in fipsatnum_str

    grid_df["FIPNUM"] = grid_df["FIPNUM"] * 3333
    fipnum_big_str = grid.df2ecl(grid_df, "FIPNUM", dtype=int)
    assert "3333" in fipnum_big_str
    assert len(fipnum_big_str) > len(fipnum_str)

    tmpdir.chdir()
    grid.df2ecl(grid_df, ["PERMX", "PERMY", "PERMZ"],
                dtype=float,
                filename="perm.inc")
    assert os.path.exists("perm.inc")
    incstring = open("perm.inc").readlines()
    assert sum([1 for line in incstring if "PERM" in line]) == 6

    with pytest.raises(ValueError):
        grid.df2ecl(grid_df, ["PERMRR"])

    # Check when we have restart info included:
    gr_rst = grid.df(eclfiles, rstdates="all")
    fipnum_str_rst = grid.df2ecl(gr_rst, "FIPNUM", dtype=int, nocomments=True)
    assert fipnum_str_rst == fipnum_str_nocomment

    # When dates are stacked, there are NaN's  in the FIPNUM column,
    # which should be gracefully ignored.
    gr_rst_stacked = grid.df(eclfiles, rstdates="all", stackdates=True)
    fipnum_str_rst = grid.df2ecl(gr_rst_stacked,
                                 "FIPNUM",
                                 dtype=int,
                                 nocomments=True)
    assert fipnum_str_rst == fipnum_str_nocomment