def df(deck: Union[EclFiles, "opm.libopmcommon_python.Deck"]) -> pd.DataFrame: """Loop through the deck and pick up information found The loop over the deck is a state machine, as it has to pick up dates """ if isinstance(deck, EclFiles): deck = deck.get_ecldeck() wconrecords = [] # List of dicts of every line in input file date = None # DATE columns will always be there, but can contain NaN for kword in deck: if kword.name in ["DATES", "START"]: for rec in kword: logger.info("Parsing at date %s", str(date)) date = parse_opmio_date_rec(rec) elif kword.name == "TSTEP": if not date: logger.critical("Can't use TSTEP when there is no start_date") return pd.DataFrame() for rec in kword: steplist = rec[0].get_raw_data_list() # Assuming not LAB units, then the unit is days. days = sum(steplist) date += datetime.timedelta(days=days) logger.info( "Advancing %s days to %s through TSTEP", str(days), str(date) ) elif kword.name in WCONKEYS: for rec in kword: # Loop over the lines inside WCON* record rec_data = parse_opmio_deckrecord(rec, kword.name) rec_data["DATE"] = date rec_data["KEYWORD"] = kword.name wconrecords.append(rec_data) elif kword.name == "TSTEP": logger.warning("WARNING: Possible premature stop at first TSTEP") break wcon_df = pd.DataFrame(wconrecords) return wcon_df
def deck2dfs( deck: "opm.io.Deck", start_date: Optional[Union[str, datetime.date]] = None, unroll: bool = True, ) -> Dict[str, pd.DataFrame]: """Loop through the deck and pick up information found The loop over the deck is a state machine, as it has to pick up dates and potential information from the WELSPECS keyword. Args: deck: A deck representing the schedule Does not have to be a full Eclipse deck, an include file is sufficient start_date: The default date to use for events where the DATE or START keyword is not found in advance. Default: None unroll: Whether to unroll rows that cover a range, like K1 and K2 in COMPDAT and in WELSEGS. Defaults to True. Returns: Dictionary with dataframes, at least for COMPDAT, COMPSEGS and WELSEGS. """ compdatrecords = [] # List of dicts of every line in input file compsegsrecords = [] welopenrecords = [] welsegsrecords = [] wsegsicdrecords = [] wsegaicdrecords = [] wsegvalvrecords = [] welspecs = {} date = start_date # DATE column will always be there, but can contain NaN/None for idx, kword in enumerate(deck): # pylint: disable=too-many-nested-blocks if kword.name == ("DATES", "START"): for rec in kword: date = parse_opmio_date_rec(rec) logger.info("Parsing at date %s", str(date)) elif kword.name == "TSTEP": if not date: logger.critical("Can't use TSTEP when there is no start_date") return {} for rec in kword: steplist = parse_opmio_tstep_rec(rec) # Assuming not LAB units, then the unit is days. days = sum(steplist) assert isinstance(date, datetime.date) date += datetime.timedelta(days=days) logger.info("Advancing %s days to %s through TSTEP", str(days), str(date)) elif kword.name == "WELSPECS": # Information from WELSPECS are to be used in case # 0 or 1* is used in the I or J column in COMPDAT # Only the latest information pr. well is stored. for wellrec in kword: welspecs_rec_dict = parse_opmio_deckrecord(wellrec, "WELSPECS") welspecs[welspecs_rec_dict["WELL"]] = { "I": welspecs_rec_dict["HEAD_I"], "J": welspecs_rec_dict["HEAD_J"], } elif kword.name == "COMPDAT": for rec in kword: # Loop over the lines inside COMPDAT record rec_data = parse_opmio_deckrecord(rec, "COMPDAT", renamer=COMPDAT_RENAMER) rec_data["DATE"] = date rec_data["KEYWORD_IDX"] = idx # start of code changes if rec_data["WELL"] != "*": if rec_data["I"] == 0: if rec_data["WELL"] not in welspecs: raise ValueError( "WELSPECS must be provided when I is defaulted in COMPDAT" ) rec_data["I"] = welspecs[rec_data["WELL"]]["I"] if rec_data["J"] == 0: if rec_data["WELL"] not in welspecs: raise ValueError( "WELSPECS must be provided when J is defaulted in COMPDAT" ) rec_data["J"] = welspecs[rec_data["WELL"]]["J"] compdatrecords.append(rec_data) else: # go through all known wells and assign correct indices for item in welspecs.items(): rec_data["WELL"] = item[0] rec_data["I"] = item[1]["I"] rec_data["J"] = item[1]["J"] compdatrecords.append(rec_data.copy()) # end of code changes elif kword.name == "WSEGSICD": for rec in kword: # Loop over the lines inside WSEGSICD record rec_data = parse_opmio_deckrecord(rec, "WSEGSICD", renamer=WSEG_RENAMER) rec_data["DATE"] = date rec_data["KEYWORD_IDX"] = idx wsegsicdrecords.append(rec_data) elif kword.name == "WSEGAICD": for rec in kword: # Loop over the lines inside WSEGAICD record rec_data = parse_opmio_deckrecord(rec, "WSEGAICD", renamer=WSEG_RENAMER) rec_data["DATE"] = date rec_data["KEYWORD_IDX"] = idx wsegaicdrecords.append(rec_data) elif kword.name == "WSEGVALV": for rec in kword: # Loop over the lines inside WSEGVALV record rec_data = parse_opmio_deckrecord(rec, "WSEGVALV") rec_data["DATE"] = date rec_data["KEYWORD_IDX"] = idx wsegvalvrecords.append(rec_data) elif kword.name == "COMPSEGS": wellname = parse_opmio_deckrecord(kword[0], "COMPSEGS", itemlistname="records", recordindex=0)["WELL"] for recidx in range(1, len(kword)): rec = kword[recidx] rec_data = parse_opmio_deckrecord(rec, "COMPSEGS", itemlistname="records", recordindex=1) rec_data["WELL"] = wellname rec_data["DATE"] = date compsegsrecords.append(rec_data) elif kword.name == "WELOPEN": for rec in kword: rec_data = parse_opmio_deckrecord(rec, "WELOPEN") rec_data["DATE"] = date rec_data["KEYWORD_IDX"] = idx if rec_data["STATUS"] not in ["OPEN", "SHUT", "STOP", "AUTO"]: rec_data["STATUS"] = "SHUT" logger.warning( "WELOPEN status %s is not a valid " "COMPDAT state. Using 'SHUT' instead.", rec_data["STATUS"], ) welopenrecords.append(rec_data) elif kword.name == "WELSEGS": # First record contains meta-information for well # (opm deck returns default values for unspecified items.) welsegsdict = parse_opmio_deckrecord(kword[0], "WELSEGS", itemlistname="records", recordindex=0) # Loop over all subsequent records. for recidx in range(1, len(kword)): rec = kword[recidx] # WARNING: We assume that SEGMENT1 === SEGMENT2 (!!!) (if not, # we need to loop over a range just as for layer in compdat) rec_data = welsegsdict.copy() rec_data["DATE"] = date rec_data.update( parse_opmio_deckrecord(rec, "WELSEGS", itemlistname="records", recordindex=1)) if "INFO_TYPE" in rec_data and rec_data["INFO_TYPE"] == "ABS": rec_data["SEGMENT_MD"] = rec_data["SEGMENT_LENGTH"] welsegsrecords.append(rec_data) compdat_df = pd.DataFrame(compdatrecords) welopen_df = pd.DataFrame(welopenrecords) if unroll and not compdat_df.empty: compdat_df = unrolldf(compdat_df, "K1", "K2") if not welopen_df.empty: compdat_df = applywelopen(compdat_df, welopen_df) compsegs_df = pd.DataFrame(compsegsrecords) welsegs_df = pd.DataFrame(welsegsrecords) wsegsicd_df = pd.DataFrame(wsegsicdrecords) wsegaicd_df = pd.DataFrame(wsegaicdrecords) wsegvalv_df = pd.DataFrame(wsegvalvrecords) if unroll and not welsegs_df.empty: welsegs_df = unrolldf(welsegs_df, "SEGMENT1", "SEGMENT2") if unroll and not wsegsicd_df.empty: wsegsicd_df = unrolldf(wsegsicd_df, "SEGMENT1", "SEGMENT2") if unroll and not wsegaicd_df.empty: wsegaicd_df = unrolldf(wsegaicd_df, "SEGMENT1", "SEGMENT2") if "KEYWORD_IDX" in compdat_df.columns: compdat_df.drop(["KEYWORD_IDX"], axis=1, inplace=True) if "KEYWORD_IDX" in wsegsicd_df.columns: wsegsicd_df.drop(["KEYWORD_IDX"], axis=1, inplace=True) if "KEYWORD_IDX" in wsegaicd_df.columns: wsegaicd_df.drop(["KEYWORD_IDX"], axis=1, inplace=True) if "KEYWORD_IDX" in wsegvalv_df.columns: wsegvalv_df.drop(["KEYWORD_IDX"], axis=1, inplace=True) return dict( COMPDAT=compdat_df, COMPSEGS=compsegs_df, WELSEGS=welsegs_df, WSEGSICD=wsegsicd_df, WSEGAICD=wsegaicd_df, WSEGVALV=wsegvalv_df, )
def df( deck: Union[EclFiles, "opm.libopmcommon_python.Deck"], startdate: Optional[datetime.date] = None, welspecs: bool = True, ) -> pd.DataFrame: """Extract all group information from a deck and present as a Pandas Dataframe of all edges. Properties for nodes given in GRUPNET/NODEPROP will be added as extra columns. From WELSPECS, well names are extracted and added as nodes with an edge to its group. The gruptree is a time dependent property, with accumulative effects from new occurences of GRUPNET, WELSPECS, BRANPROP and NODEPROP. Whenever the GRUPTREE or BRANPROP tree changes, the previous tree is copied and a new complete tree is added to the dataframe tagged with the new date. startdate is only relevant when START is not in the deck. Args: deck: opm.io Deck object or EclFiles Returns: pd.DataFrame with one row pr edge. Empty dataframe if no information is found in deck. """ date: Optional[datetime.date] if startdate is not None: date = startdate else: date = None if isinstance(deck, EclFiles): deck = deck.get_ecldeck() edgerecords = [] # list of dict of rows containing an edge. nodedatarecords = [] # In order for the GRUPTREE/BRANPROP keywords to accumulate, we # store the edges as dictionaries indexed by the edge # (which is a tuple of child and parent). currentedges: Dict[str, Dict[Tuple[str, str], Dict[str, Any]]] = { "GRUPTREE": {}, "BRANPROP": {}, } # Same approach for the welspecs keywords wellspecsedges: Dict[Tuple[str, str], str] = {} # Node properties from GRUPNET/NODEPROP is stored in a dataframe # Note that it's not allowed to mix GRUPNET and NODEPROP in eclipse # so the datframe will only contain columns from one of them nodedata: Dict[str, pd.DataFrame] = { "GRUPNET": pd.DataFrame(), "NODEPROP": pd.DataFrame(), } # Flags which will tell when a new network related keyword # has been encountered keywords = ["GRUPTREE", "BRANPROP", "WELSPECS", "GRUPNET", "NODEPROP"] found_keywords = {key: False for key in keywords} for kword in deck: if kword.name in ["DATES", "START", "TSTEP"]: # Whenever we encounter a new DATES, it means that # we have processed all the network keywords that # have occured since the last date, so this is the chance # to dump the parsed data. Also we dump the *entire* tree # at every date with a change, not only the newfound edges. if any(val for val in found_keywords.values()): if date is None: logger.warning("No date parsed, maybe you should pass --startdate") logger.warning("Using 1900-01-01") date = datetime.date(year=1900, month=1, day=1) edgerecords += _write_edgerecords( currentedges, nodedata, wellspecsedges, found_keywords, date ) found_keywords = {key: False for key in keywords} # Done dumping the data for the previous date, parse the fresh # date: if kword.name in ["DATES", "START"]: for rec in kword: date = parse_opmio_date_rec(rec) logger.debug("Parsing at date %s", str(date)) elif kword.name == "TSTEP": assert date is not None for rec in kword: steplist = parse_opmio_tstep_rec(rec) # Assuming not LAB units, then the unit is days. days = sum(steplist) date += datetime.timedelta(days=days) logger.info( "Advancing %s days to %s through TSTEP", str(days), str(date) ) if kword.name in ["GRUPTREE", "BRANPROP"]: found_keywords[kword.name] = True renamer = ( {"DOWNTREE_NODE": "CHILD_GROUP", "UPTREE_NODE": "PARENT_GROUP"} if kword.name == "BRANPROP" else None ) for edgerec in kword: edge_dict = parse_opmio_deckrecord(edgerec, kword.name, renamer=renamer) child = edge_dict.pop("CHILD_GROUP") parent = edge_dict.pop("PARENT_GROUP") currentedges[kword.name][(child, parent)] = edge_dict if kword.name == "WELSPECS" and welspecs: found_keywords["WELSPECS"] = True for wellrec in kword: wspc_dict = parse_opmio_deckrecord(wellrec, "WELSPECS") wellspecsedges[(wspc_dict["WELL"], wspc_dict["GROUP"])] = "WELSPECS" if kword.name in ["GRUPNET", "NODEPROP"]: found_keywords[kword.name] = True renamer = ( {"PRESSURE": "TERMINAL_PRESSURE"} if kword.name == "NODEPROP" else None ) for rec in kword: nodedatarecords.append( parse_opmio_deckrecord(rec, kword.name, renamer=renamer) ) nodedata[kword.name] = ( pd.DataFrame(nodedatarecords) .drop_duplicates(subset="NAME", keep="last") .set_index("NAME") ) # Ensure we also store any tree information found after the last DATE statement if any(val for val in found_keywords.values()): edgerecords += _write_edgerecords( currentedges, nodedata, wellspecsedges, found_keywords, date ) dframe = pd.DataFrame(edgerecords) if "DATE" in dframe: dframe["DATE"] = pd.to_datetime(dframe["DATE"]) # Remove rows with duplicate DATE, CHILD and KEYWORD # This happens with WELSPECS if both GRUPTREE and BRANPROP is defined # at the same timestep. And when a node is redirected to a new parent node dframe = dframe.drop_duplicates(subset=["DATE", "CHILD", "KEYWORD"], keep="last") print(dframe) return dframe
def df( deck: Union[EclFiles, "opm.libopmcommon_python.Deck"], startdate: Optional[datetime.date] = None, welspecs: bool = True, ) -> pd.DataFrame: """Extract all group information from a deck and present as a Pandas Dataframe of all edges. Numerical properties for nodes given in GRUPNET will be added as extra columns. From WELSPECS, well names are extracted and added as nodes with an edge to its group. The gruptree is a time dependent property, with accumulative effects from new occurences of GRUPTREE or WELSPECS. Whenever the tree changes, the previous tree is copied and a new complete tree is added to the dataframe tagged with the new date. startdate is only relevant when START is not in the deck. Args: deck: opm.io Deck object or EclFiles Returns: pd.DataFrame with one row pr edge. Empty dataframe if no information is found in deck. """ date: Optional[datetime.date] if startdate is not None: date = startdate else: date = None if isinstance(deck, EclFiles): deck = deck.get_ecldeck() gruptreerecords = [] # list of dict of rows containing an edge. grupnetrecords = [] # In order for the GRUPTREE keywords to accumulate, we # store the edges as a dictionary indexed by the edge # (which is a tuple of child and parent). # The value of the dictionary is GRUPTREE or WELSPECS currentedges: Dict[tuple, str] = dict() grupnet_df: pd.DataFrame = pd.DataFrame() found_gruptree = False # Flags which will tell when a new GRUPTREE or found_welspecs = False # WELSPECS have been encountered. found_grupnet = False # GRUPNET has been encountered for kword in deck: if kword.name == "DATES" or kword.name == "START" or kword.name == "TSTEP": # Whenever we encounter a new DATES, it means that # we have processed all the GRUPTREE and WELSPECS that # have occured since the last date, so this is the chance # to dump the parsed data. Also we dump the *entire* tree # at every date with a change, not only the newfound edges. if currentedges and (found_gruptree or found_welspecs or found_grupnet): if date is None: logger.warning("No date parsed, maybe you should pass --startdate") logger.warning("Using 1900-01-01") date = datetime.date(year=1900, month=1, day=1) gruptreerecords += _currentedges_to_gruptreerecords( currentedges, grupnet_df, date ) found_gruptree = False found_welspecs = False found_grupnet = False # Done dumping the data for the previous date, parse the fresh # date: if kword.name == "DATES" or kword.name == "START": for rec in kword: date = parse_opmio_date_rec(rec) logging.info("Parsing at date %s", str(date)) elif kword.name == "TSTEP": assert date is not None for rec in kword: steplist = parse_opmio_tstep_rec(rec) # Assuming not LAB units, then the unit is days. days = sum(steplist) if days <= 0: logger.critical("Invalid TSTEP, summed to %s days", str(days)) return pd.DataFrame() date += datetime.timedelta(days=days) logger.info( "Advancing %s days to %s through TSTEP", str(days), str(date) ) else: logger.critical("BUG: Should not get here") return pd.DataFrame() if kword.name == "GRUPTREE": found_gruptree = True for edgerec in kword: edge_dict = parse_opmio_deckrecord(edgerec, "GRUPTREE") currentedges[ (edge_dict["CHILD_GROUP"], edge_dict["PARENT_GROUP"]) ] = "GRUPTREE" if kword.name == "WELSPECS" and welspecs: found_welspecs = True for wellrec in kword: wspc_dict = parse_opmio_deckrecord(wellrec, "WELSPECS") currentedges[(wspc_dict["WELL"], wspc_dict["GROUP"])] = "WELSPECS" if kword.name == "GRUPNET": found_grupnet = True for rec in kword: grupnet_data = parse_opmio_deckrecord(rec, "GRUPNET") grupnetrecords.append(grupnet_data) grupnet_df = ( pd.DataFrame(grupnetrecords) .drop_duplicates(subset="NAME", keep="last") .set_index("NAME") ) # Ensure we also store any tree information found after the last DATE statement if found_gruptree or found_welspecs: gruptreerecords += _currentedges_to_gruptreerecords( currentedges, grupnet_df, date ) dframe = pd.DataFrame(gruptreerecords) if "DATE" in dframe: dframe["DATE"] = pd.to_datetime(dframe["DATE"]) print(dframe) return dframe