Example #1
0
    def test_dump_csv_line(self):
        ecl_sum_vector = EclSumKeyWordVector(self.ecl_sum)
        ecl_sum_vector.addKeywords("F*")

        with self.assertRaises(KeyError):
            ecl_sum_vector.addKeyword("MISSING")

        dtime = datetime.datetime(2002, 1, 1, 0, 0, 0)
        with TestAreaContext("EclSum/csv_dump"):
            test_file_name = self.createTestPath("dump.csv")
            outputH = copen(test_file_name, "w")
            self.ecl_sum.dumpCSVLine(dtime, ecl_sum_vector, outputH)
            assert os.path.isfile(test_file_name)
Example #2
0
def _load_smry_into_table(smry_filename: str) -> pa.Table:
    """
    Reads data from SMRY file into PyArrow Table.
    DATE column is stored as an Arrow timetamp with ms resolution, timestamp[ms]
    All numeric columns will be stored as 32 bit float
    Summary meta data will be attached per field/column of the table's schema under the
    'smry_meta' key
    """

    eclsum = EclSum(smry_filename, include_restart=False, lazy_load=False)

    # For now, we go via a set to prune out duplicate entries being returned by EclSumKeyWordVector,
    # see: https://github.com/equinor/ecl/issues/816#issuecomment-865881283
    column_names: List[str] = list(
        set(EclSumKeyWordVector(eclsum, add_keywords=True)))

    # Exclude CPI columns from export
    org_col_count = len(column_names)
    column_names = [
        colname for colname in column_names if not _is_cpi_column(colname)
    ]
    if len(column_names) != org_col_count:
        logger.info(
            f"Excluding {org_col_count - len(column_names)} CPI columns from export"
        )

    # Fetch the dates as a numpy array with ms resolution
    np_dates_ms = eclsum.numpy_dates

    smry_meta_dict = _create_smry_meta_dict(eclsum, column_names)

    # Datatypes to use for DATE column and all the numeric columns
    dt_timestamp_ms = pa.timestamp("ms")
    dt_float32 = pa.float32()

    # Build schema for the table
    field_list: List[pa.Field] = []
    field_list.append(pa.field("DATE", dt_timestamp_ms))
    for colname in column_names:
        field_metadata = {b"smry_meta": json.dumps(smry_meta_dict[colname])}
        field_list.append(
            pa.field(colname, dt_float32, metadata=field_metadata))

    schema = pa.schema(field_list)

    # Now extract all the summary vectors one by one
    # We do this through EclSum.numpy_vector() instead of EclSum.pandas_frame() since
    # the latter throws an exception if the SMRY data has timestamps beyond 2262,
    # see: https://github.com/equinor/ecl/issues/802
    column_arrays = [np_dates_ms]

    for colname in column_names:
        colvector = eclsum.numpy_vector(colname)
        column_arrays.append(colvector)

    table = pa.table(column_arrays, schema=schema)

    return table
Example #3
0
    def test_kw_vector(self):
        case1 = create_case()
        case2 = createEclSum("CSV" , [("FOPR", None , 0, "SM3/DAY") , ("FOPT" , None , 0, "SM3"), ("FWPT" , None , 0, "SM3")],
                             sim_length_days = 100,
                             num_report_step = 10,
                             num_mini_step = 10,
                             func_table = {"FOPT" : fopt,
                                           "FOPR" : fopr ,
                                           "FWPT" : fgpt })

        kw_list = EclSumKeyWordVector( case1 )
        kw_list.add_keyword("FOPT")
        kw_list.add_keyword("FGPT")
        kw_list.add_keyword("FOPR")

        t = case1.getDataStartTime( ) + datetime.timedelta( days = 43 );
        data = case1.get_interp_row( kw_list , t )
        for d1,d2 in zip(data, [ case1.get_interp("FOPT", date = t),
                                 case1.get_interp("FOPT", date = t),
                                 case1.get_interp("FOPT", date = t) ]):

            self.assertFloatEqual(d1,d2)

        tmp = []
        for key in kw_list:
            tmp.append(key)

        for (k1,k2) in zip(kw_list,tmp):
            self.assertEqual(k1,k2)

        kw_list2 = kw_list.copy(case2)
        self.assertIn("FOPT", kw_list2)
        self.assertIn("FOPR", kw_list2)
        self.assertIn("FGPT", kw_list2)
        data2 = case2.get_interp_row( kw_list2 , t )

        self.assertEqual(len(data2), 3)
        self.assertEqual(data[0], data2[0])
        self.assertEqual(data[2], data2[2])

        with TestAreaContext("sum_vector"):
            with cwrap.open("f1.txt","w") as f:
                case1.dumpCSVLine(t, kw_list, f)

            with cwrap.open("f2.txt", "w") as f:
                case2.dumpCSVLine(t,kw_list2,f)

            with open("f1.txt") as f:
                d1 = f.readline().split(",")

            with open("f2.txt") as f:
                d2 = f.readline().split(",")

            self.assertEqual(d1[0],d2[0])
            self.assertEqual(d1[2],d2[2])
            self.assertEqual(d2[1],"")
Example #4
0
    def test_kw_vector(self):
        case1 = create_case()
        case2 = createEclSum("CSV" , [("FOPR", None , 0, "SM3/DAY") , ("FOPT" , None , 0, "SM3"), ("FWPT" , None , 0, "SM3")],
                             sim_length_days = 100,
                             num_report_step = 10,
                             num_mini_step = 10,
                             func_table = {"FOPT" : fopt,
                                           "FOPR" : fopr ,
                                           "FWPT" : fgpt })

        kw_list = EclSumKeyWordVector( case1 )
        kw_list.add_keyword("FOPT")
        kw_list.add_keyword("FGPT")
        kw_list.add_keyword("FOPR")

        t = case1.getDataStartTime( ) + datetime.timedelta( days = 43 );
        data = case1.get_interp_row( kw_list , t )
        for d1,d2 in zip(data, [ case1.get_interp("FOPT", date = t),
                                 case1.get_interp("FOPT", date = t),
                                 case1.get_interp("FOPT", date = t) ]):

            self.assertFloatEqual(d1,d2)

        tmp = []
        for key in kw_list:
            tmp.append(key)

        for (k1,k2) in zip(kw_list,tmp):
            self.assertEqual(k1,k2)

        kw_list2 = kw_list.copy(case2)
        self.assertIn("FOPT", kw_list2)
        self.assertIn("FOPR", kw_list2)
        self.assertIn("FGPT", kw_list2)
        data2 = case2.get_interp_row( kw_list2 , t )

        self.assertEqual(len(data2), 3)
        self.assertEqual(data[0], data2[0])
        self.assertEqual(data[2], data2[2])

        with TestAreaContext("sum_vector"):
            with cwrap.open("f1.txt","w") as f:
                case1.dumpCSVLine(t, kw_list, f)

            with cwrap.open("f2.txt", "w") as f:
                case2.dumpCSVLine(t,kw_list2,f)

            with open("f1.txt") as f:
                d1 = f.readline().split(",")

            with open("f2.txt") as f:
                d2 = f.readline().split(",")

            self.assertEqual(d1[0],d2[0])
            self.assertEqual(d1[2],d2[2])
            self.assertEqual(d2[1],"")
Example #5
0
    def test_dump_csv_line(self):
        ecl_sum_vector = EclSumKeyWordVector(self.ecl_sum)
        ecl_sum_vector.addKeywords("F*")

        with self.assertRaises(KeyError):
            ecl_sum_vector.addKeyword("MISSING")

        dtime = datetime.datetime(2002, 1, 1, 0, 0, 0)
        with TestAreaContext("EclSum/csv_dump"):
            test_file_name = self.createTestPath("dump.csv")
            outputH = copen(test_file_name, "w")
            self.ecl_sum.dumpCSVLine(dtime, ecl_sum_vector, outputH)
            assert os.path.isfile(test_file_name)
Example #6
0
def _libecl_eclsum_pandas_frame(
    eclsum: EclSum,
    time_index: Optional[Union[List[dt.date], List[dt.datetime]]] = None,
    column_keys: Optional[List[str]] = None,
) -> pd.DataFrame:
    """Build a Pandas dataframe from an EclSum object.

    Temporarily copied from libecl to circumvent bug

    https://github.com/equinor/ecl/issues/802
    """
    if column_keys is None:
        keywords = EclSumKeyWordVector(eclsum, add_keywords=True)
    else:
        keywords = EclSumKeyWordVector(eclsum)
        for key in column_keys:
            keywords.add_keywords(key)

    if len(keywords) == 0:
        raise ValueError("No valid key")

    # pylint: disable=protected-access
    if time_index is None:
        time_index = eclsum.dates  # Changed from libecl
        data = np.zeros([len(time_index), len(keywords)])
        EclSum._init_pandas_frame(
            eclsum, keywords, data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
        )
    else:
        time_points = eclsum._make_time_vector(time_index)
        data = np.zeros([len(time_points), len(keywords)])
        EclSum._init_pandas_frame_interp(
            eclsum,
            keywords,
            time_points,
            data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        )

    # Do not give datetime64[ms] to Pandas, it will try to convert it
    # to datetime64[ns] and error hard if it is out of bounds (year 2262)
    assert isinstance(time_index[0], (dt.date, dt.datetime))
    frame = pd.DataFrame(
        index=time_index,
        columns=list(keywords),
        data=data,
    )

    # frame.index.type is now either datetime64[ns] or datetime.datetime (object)
    # depending on whether the date range ended before 2262.
    return frame
Example #7
0
def _extract_well_connection_status(filename: Path) -> pd.DataFrame:
    # pylint: disable=too-many-locals
    """Exctracts well connection status history for each compdat connection that
    is included in the summary data on the form CPI:WELL,I,J,K.

    From the CPI time series it is possible to extract the status of the connection
    because it is 0 when the connection is SHUT and >0 when the connection is open.

    The output from this function is one row for every time a connection changes
    status. The earliest date for any connection will be OPEN, i.e a cell can not
    be SHUT before it has been OPEN. This means that any cells that are always SHUT
    will not be included in the export.
    """

    eclsum = EclSum(str(filename), include_restart=False, lazy_load=False)
    column_names: Set[str] = set(EclSumKeyWordVector(eclsum,
                                                     add_keywords=True))
    np_dates_ms = eclsum.numpy_dates

    cpi_columns = [
        col for col in column_names
        if re.match("^CPI:[A-Z0-9_-]{1,8}:[0-9]+,[0-9]+,[0-9]+$", col)
    ]
    df = pd.DataFrame(columns=["DATE", "WELL", "I", "J", "K", "OP/SH"])

    for col in cpi_columns:
        colsplit = col.split(":")
        well = colsplit[1]
        i, j, k = colsplit[2].split(",")

        vector = eclsum.numpy_vector(col)

        status_changes = _get_status_changes(np_dates_ms, vector)
        for date, status in status_changes:
            df.loc[df.shape[0]] = [date, well, i, j, k, status]

    return df