def test_dump_csv_line(self): ecl_sum_vector = EclSumKeyWordVector(self.ecl_sum) ecl_sum_vector.addKeywords("F*") with self.assertRaises(KeyError): ecl_sum_vector.addKeyword("MISSING") dtime = datetime.datetime(2002, 1, 1, 0, 0, 0) with TestAreaContext("EclSum/csv_dump"): test_file_name = self.createTestPath("dump.csv") outputH = copen(test_file_name, "w") self.ecl_sum.dumpCSVLine(dtime, ecl_sum_vector, outputH) assert os.path.isfile(test_file_name)
def _load_smry_into_table(smry_filename: str) -> pa.Table: """ Reads data from SMRY file into PyArrow Table. DATE column is stored as an Arrow timetamp with ms resolution, timestamp[ms] All numeric columns will be stored as 32 bit float Summary meta data will be attached per field/column of the table's schema under the 'smry_meta' key """ eclsum = EclSum(smry_filename, include_restart=False, lazy_load=False) # For now, we go via a set to prune out duplicate entries being returned by EclSumKeyWordVector, # see: https://github.com/equinor/ecl/issues/816#issuecomment-865881283 column_names: List[str] = list( set(EclSumKeyWordVector(eclsum, add_keywords=True))) # Exclude CPI columns from export org_col_count = len(column_names) column_names = [ colname for colname in column_names if not _is_cpi_column(colname) ] if len(column_names) != org_col_count: logger.info( f"Excluding {org_col_count - len(column_names)} CPI columns from export" ) # Fetch the dates as a numpy array with ms resolution np_dates_ms = eclsum.numpy_dates smry_meta_dict = _create_smry_meta_dict(eclsum, column_names) # Datatypes to use for DATE column and all the numeric columns dt_timestamp_ms = pa.timestamp("ms") dt_float32 = pa.float32() # Build schema for the table field_list: List[pa.Field] = [] field_list.append(pa.field("DATE", dt_timestamp_ms)) for colname in column_names: field_metadata = {b"smry_meta": json.dumps(smry_meta_dict[colname])} field_list.append( pa.field(colname, dt_float32, metadata=field_metadata)) schema = pa.schema(field_list) # Now extract all the summary vectors one by one # We do this through EclSum.numpy_vector() instead of EclSum.pandas_frame() since # the latter throws an exception if the SMRY data has timestamps beyond 2262, # see: https://github.com/equinor/ecl/issues/802 column_arrays = [np_dates_ms] for colname in column_names: colvector = eclsum.numpy_vector(colname) column_arrays.append(colvector) table = pa.table(column_arrays, schema=schema) return table
def test_kw_vector(self): case1 = create_case() case2 = createEclSum("CSV" , [("FOPR", None , 0, "SM3/DAY") , ("FOPT" , None , 0, "SM3"), ("FWPT" , None , 0, "SM3")], sim_length_days = 100, num_report_step = 10, num_mini_step = 10, func_table = {"FOPT" : fopt, "FOPR" : fopr , "FWPT" : fgpt }) kw_list = EclSumKeyWordVector( case1 ) kw_list.add_keyword("FOPT") kw_list.add_keyword("FGPT") kw_list.add_keyword("FOPR") t = case1.getDataStartTime( ) + datetime.timedelta( days = 43 ); data = case1.get_interp_row( kw_list , t ) for d1,d2 in zip(data, [ case1.get_interp("FOPT", date = t), case1.get_interp("FOPT", date = t), case1.get_interp("FOPT", date = t) ]): self.assertFloatEqual(d1,d2) tmp = [] for key in kw_list: tmp.append(key) for (k1,k2) in zip(kw_list,tmp): self.assertEqual(k1,k2) kw_list2 = kw_list.copy(case2) self.assertIn("FOPT", kw_list2) self.assertIn("FOPR", kw_list2) self.assertIn("FGPT", kw_list2) data2 = case2.get_interp_row( kw_list2 , t ) self.assertEqual(len(data2), 3) self.assertEqual(data[0], data2[0]) self.assertEqual(data[2], data2[2]) with TestAreaContext("sum_vector"): with cwrap.open("f1.txt","w") as f: case1.dumpCSVLine(t, kw_list, f) with cwrap.open("f2.txt", "w") as f: case2.dumpCSVLine(t,kw_list2,f) with open("f1.txt") as f: d1 = f.readline().split(",") with open("f2.txt") as f: d2 = f.readline().split(",") self.assertEqual(d1[0],d2[0]) self.assertEqual(d1[2],d2[2]) self.assertEqual(d2[1],"")
def _libecl_eclsum_pandas_frame( eclsum: EclSum, time_index: Optional[Union[List[dt.date], List[dt.datetime]]] = None, column_keys: Optional[List[str]] = None, ) -> pd.DataFrame: """Build a Pandas dataframe from an EclSum object. Temporarily copied from libecl to circumvent bug https://github.com/equinor/ecl/issues/802 """ if column_keys is None: keywords = EclSumKeyWordVector(eclsum, add_keywords=True) else: keywords = EclSumKeyWordVector(eclsum) for key in column_keys: keywords.add_keywords(key) if len(keywords) == 0: raise ValueError("No valid key") # pylint: disable=protected-access if time_index is None: time_index = eclsum.dates # Changed from libecl data = np.zeros([len(time_index), len(keywords)]) EclSum._init_pandas_frame( eclsum, keywords, data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) ) else: time_points = eclsum._make_time_vector(time_index) data = np.zeros([len(time_points), len(keywords)]) EclSum._init_pandas_frame_interp( eclsum, keywords, time_points, data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ) # Do not give datetime64[ms] to Pandas, it will try to convert it # to datetime64[ns] and error hard if it is out of bounds (year 2262) assert isinstance(time_index[0], (dt.date, dt.datetime)) frame = pd.DataFrame( index=time_index, columns=list(keywords), data=data, ) # frame.index.type is now either datetime64[ns] or datetime.datetime (object) # depending on whether the date range ended before 2262. return frame
def _extract_well_connection_status(filename: Path) -> pd.DataFrame: # pylint: disable=too-many-locals """Exctracts well connection status history for each compdat connection that is included in the summary data on the form CPI:WELL,I,J,K. From the CPI time series it is possible to extract the status of the connection because it is 0 when the connection is SHUT and >0 when the connection is open. The output from this function is one row for every time a connection changes status. The earliest date for any connection will be OPEN, i.e a cell can not be SHUT before it has been OPEN. This means that any cells that are always SHUT will not be included in the export. """ eclsum = EclSum(str(filename), include_restart=False, lazy_load=False) column_names: Set[str] = set(EclSumKeyWordVector(eclsum, add_keywords=True)) np_dates_ms = eclsum.numpy_dates cpi_columns = [ col for col in column_names if re.match("^CPI:[A-Z0-9_-]{1,8}:[0-9]+,[0-9]+,[0-9]+$", col) ] df = pd.DataFrame(columns=["DATE", "WELL", "I", "J", "K", "OP/SH"]) for col in cpi_columns: colsplit = col.split(":") well = colsplit[1] i, j, k = colsplit[2].split(",") vector = eclsum.numpy_vector(col) status_changes = _get_status_changes(np_dates_ms, vector) for date, status in status_changes: df.loc[df.shape[0]] = [date, well, i, j, k, status] return df