def test_stack_on_colname(): """ Test that we can stack column with an implicit double level in the column names indicated by a separator string""" dframe = pd.DataFrame( columns=["EQLNUM", "STATIC", "OWC@2000-01-01", "OWC@2020-01-01"], data=[[1, 1.2, 2000, 1900], [2, 1.3, 2100, 2050]], ) stacked = common.stack_on_colnames(dframe) assert "DATE" in stacked assert "OWC" in stacked assert len(stacked.columns) == 4 assert len(stacked["DATE"].unique()) == 2 assert len(stacked) == 4 assert not stacked.isnull().sum().sum() dframe = pd.DataFrame( columns=[ "EQLNUM", "STATIC", "OWC@2000-01-01", "OWC@2020-01-01", "GOC@2000-01-01", "GOC@2020-01-01", ], data=[[1, 1.2, 2000, 1900, 1800, 1700], [2, 1.3, 2100, 2050, 2000, 1950]], ) stacked = common.stack_on_colnames(dframe) assert "DATE" in stacked assert "OWC" in stacked assert "GOC" in stacked assert len(stacked.columns) == 5 assert len(stacked["DATE"].unique()) == 2 assert len(stacked) == 4 assert not stacked.isnull().sum().sum() dframe = pd.DataFrame(columns=["OWC@2000-01-01", "OWC@2020-01-01"], data=[[2000, 1900], [2100, 2050]]) stacked = common.stack_on_colnames(dframe) assert "DATE" in stacked assert "OWC" in stacked assert len(stacked.columns) == 2 assert len(stacked["DATE"].unique()) == 2 assert len(stacked) == 4 assert not stacked.isnull().sum().sum() dframe = pd.DataFrame(columns=["EQLNUM", "STATIC"], data=[[1, 1.2], [2, 1.3]]) stacked = common.stack_on_colnames(dframe) assert "DATE" not in stacked assert "OWC" not in stacked assert "EQLNUM" in stacked assert "STATIC" in stacked assert len(stacked.columns) == 2 assert len(stacked) == 2 assert not stacked.isnull().sum().sum()
def df( eclfiles: EclFiles, region: str = None, rstdates: Optional[Union[str, datetime.date, List[datetime.date]]] = None, soilcutoff: float = 0.2, sgascutoff: float = 0.7, swatcutoff: float = 0.7, stackdates: bool = False, ) -> pd.DataFrame: """Produce a dataframe with pillar information This is the "main" function for Python API users Produces a dataframe with data for each I-J combination (in the column PILLAR), and if a region parameter is supplied, also pr. region. PORV is the summed porevolume of the pillar (in the region), VOLUME is bulk volume, and PORO is porevolume weighted porosity PERM columns contain unweighted value averages, use with caution. If a restart date is picked, then SWAT and SGAS will be used to compute volumes pr. phase, WATVOL, OILVOL and GASVOL. The columns with dynamic data will include the date in the column headers like SWAT@2009-01-01 Args: region: A parameter the pillars will be split on. Typically EQLNUM or FIPNUM. Set to empty string or None to avoid any region grouping. rstdates: Dates for which restart data is to be extracted. The string can be in ISO-format, or one of the mnenomics 'first', 'last' or 'all'. It can also be a list of datetime.date. soilcutoff: If not None, an oil-water contact will be estimated pr. pillar, based on the deepest cell with SOIL above the given cutoff. Value is put in column OWC. sgascuttof: If not None, a gas contact will be estimated pr pillar, based on the deepest cell with SGAS above the given cutoff. Value is put in column GOC. swatcutoff: OWC or GWC is only computed for pillars where at least one cell is above this value. stackdates: If true, a column called DATE will be added and data for all restart dates will be added in a stacked manner. """ # List of vectors we want, conservative in order to save memory and cputime: vectors = [] if region: vectors.append(region) vectors.extend(["POR*", "PERM*", "SWAT", "SGAS", "1OVERBO", "1OVERBG"]) grid_df = grid.df(eclfiles, rstdates=rstdates, vectors=vectors, dateinheaders=True) rstdates_iso = grid.dates2rstindices(eclfiles, rstdates)[2] grid_df["PILLAR"] = grid_df["I"].astype(str) + "-" + grid_df["J"].astype( str) logger.info("Computing pillar statistics") groupbies = ["PILLAR"] if region: if region not in grid_df: logger.warning("Region parameter %s not found, ignored", region) else: groupbies.append(region) grid_df[region] = grid_df[region].astype(int) for datestr in rstdates_iso: logger.info("Dynamic volumes for %s", datestr) volumes = compute_volumes(grid_df, datestr=datestr) grid_df = pd.concat([grid_df, volumes], axis="columns", sort=False) aggregators = { key: AGGREGATORS[key.split("@")[0]] for key in grid_df if key.split("@")[0] in AGGREGATORS } # Group over PILLAR and possibly regions: grouped = (grid_df.groupby(groupbies).agg(aggregators)).reset_index() # Compute correct pillar averaged porosity (from bulk) if "PORV" in grouped and "VOLUME" in grouped: grouped["PORO"] = grouped["PORV"] / grouped["VOLUME"] # Compute contacts: for datestr in rstdates_iso: if "SWAT@" + datestr in grid_df and ("SOIL@" + datestr in grid_df or "SGAS@" + datestr in grid_df): contacts = compute_pillar_contacts( grid_df, region=region, soilcutoff=soilcutoff, sgascutoff=sgascutoff, swatcutoff=swatcutoff, datestr=datestr, ) if not contacts.empty: grouped = pd.merge(grouped, contacts, how="left") if stackdates: return common.stack_on_colnames(grouped, sep="@", stackcolname="DATE", inplace=True) return grouped
def test_stack_on_colname(): """Test that we can stack column with an implicit double level in the column names indicated by a separator string""" dframe = pd.DataFrame( columns=["EQLNUM", "STATIC", "OWC@2000-01-01", "OWC@2020-01-01"], data=[[1, 1.2, 2000, 1900], [2, 1.3, 2100, 2050]], ) orig_dframe = dframe.copy() common.stack_on_colnames(dframe, inplace=False) pd.testing.assert_frame_equal(dframe, orig_dframe) stacked = common.stack_on_colnames(dframe) assert not dframe.equals(orig_dframe) # It was modifid in the process assert "DATE" in stacked assert "OWC" in stacked assert len(stacked.columns) == 4 assert len(stacked["DATE"].unique()) == 2 assert len(stacked) == 4 assert not stacked.isnull().sum().sum() dframe = pd.DataFrame( columns=[ "EQLNUM", "STATIC", "OWC@2000-01-01", "OWC@2020-01-01", "GOC@2000-01-01", "GOC@2020-01-01", ], data=[[1, 1.2, 2000, 1900, 1800, 1700], [2, 1.3, 2100, 2050, 2000, 1950]], ) stacked = common.stack_on_colnames(dframe) assert "DATE" in stacked assert "OWC" in stacked assert "GOC" in stacked assert len(stacked.columns) == 5 assert len(stacked["DATE"].unique()) == 2 assert len(stacked) == 4 assert not stacked.isnull().sum().sum() dframe = pd.DataFrame( columns=["OWC@2000-01-01", "OWC@2020-01-01"], data=[[2000, 1900], [2100, 2050]] ) stacked = common.stack_on_colnames(dframe) assert "DATE" in stacked assert "OWC" in stacked assert len(stacked.columns) == 2 assert len(stacked["DATE"].unique()) == 2 assert len(stacked) == 4 assert not stacked.isnull().sum().sum() dframe = pd.DataFrame(columns=["EQLNUM", "STATIC"], data=[[1, 1.2], [2, 1.3]]) stacked = common.stack_on_colnames(dframe) assert "DATE" not in stacked assert "OWC" not in stacked assert "EQLNUM" in stacked assert "STATIC" in stacked assert len(stacked.columns) == 2 assert len(stacked) == 2 assert not stacked.isnull().sum().sum()