def write_hdf5_and_create_xml(self): """Write dataframe data to hdf5 file and create xml for RESQML objects to represent dataframe.""" self._set_mesh_from_df( ) # writes hdf5 data and creates xml for mesh (and property) if self.column_lookup is None: self.column_lookup = rqp.StringLookup( self.model, int_to_str_dict=dict(enumerate(self.df.columns)), title='dataframe columns') self.column_lookup_uuid = self.column_lookup.uuid sl_node = self.column_lookup.create_xml() else: sl_node = self.column_lookup.root if sl_node is not None: self.model.create_reciprocal_relationship(self.mesh.root, 'destinationObject', sl_node, 'sourceObject') if self.uom_list and self.uom_lookup is None: self.uom_lookup = rqp.StringLookup(self.model, int_to_str_dict=dict( enumerate(self.uom_list)), title='dataframe units') self.uom_lookup_uuid = self.uom_lookup.uuid ul_node = self.uom_lookup.create_xml() elif self.uom_lookup is not None: ul_node = self.uom_lookup.root else: ul_node = None if ul_node is not None: self.model.create_reciprocal_relationship(self.mesh.root, 'destinationObject', ul_node, 'sourceObject')
def lookup_from_cellio(line, model): """Create a StringLookup Object from a cell I/O row containing a categorical column name and details. Arguments: line: a string from a cell I/O file, containing the column (log) name, type and categorical information model: the model to add the StringTableLookup to Returns: uuid: the uuid of a StringTableLookup, either for a newly created table, or for an existing table if an identical one exists """ lookup_dict = {} value, string = None, None # Generate a dictionary of values and strings for i, word in enumerate(line.split()): if i == 0: title = word elif not i < 2: if value is not None and string is not None: lookup_dict[value] = string value, string = None, None if value is None: value = int(word) else: if i == len(line.split()) - 1: lookup_dict[value] = word else: string = word # Check if a StringLookupTable already exists in the model, with the same name and values for existing_uuid in model.uuids(obj_type='StringTableLookup'): table = rqp.StringLookup(parent_model=model, uuid=existing_uuid) if table.title == title: if table.str_dict == lookup_dict: return table.uuid # If the exact table exists, reuse it by returning the uuid # If no matching StringLookupTable exists, make a new one and return the uuid lookup = rqp.StringLookup(parent_model=model, int_to_str_dict=lookup_dict, title=title) lookup.create_xml(add_as_part=True) return lookup.uuid
def example_model_with_prop_ts_rels(tmp_path): """Model with a grid (5x5x3) and properties. Properties: - Zone (discrete) - VPC (discrete) - Fault block (discrete) - Facies (discrete) - NTG (continuous) - POR (continuous) - SW (continuous) (recurrent) """ model_path = str(tmp_path / 'test_model.epc') model = Model(create_basics=True, create_hdf5_ext=True, epc_file=model_path, new_epc=True) model.store_epc(model.epc_file) grid = grr.RegularGrid(parent_model=model, origin=(0, 0, 0), extent_kji=(3, 5, 5), crs_uuid=rqet.uuid_for_part_root(model.crs_root), set_points_cached=True) grid.cache_all_geometry_arrays() grid.write_hdf5_from_caches(file=model.h5_file_name(file_must_exist=False), mode='w') grid.create_xml(ext_uuid=model.h5_uuid(), title='grid', write_geometry=True, add_cell_length_properties=False) model.store_epc() zone = np.ones(shape=(5, 5), dtype='int') zone_array = np.array([zone, zone + 1, zone + 2], dtype='int') vpc = np.array([[1, 1, 1, 2, 2], [1, 1, 1, 2, 2], [1, 1, 1, 2, 2], [1, 1, 1, 2, 2], [1, 1, 1, 2, 2]], dtype='int') vpc_array = np.array([vpc, vpc, vpc], dtype='int') facies = np.array([[1, 1, 1, 2, 2], [1, 1, 2, 2, 2], [1, 2, 2, 2, 3], [2, 2, 2, 3, 3], [2, 2, 3, 3, 3]], dtype='int') facies_array = np.array([facies, facies, facies], dtype='int') perm = np.array([[1, 1, 1, 10, 10], [1, 1, 1, 10, 10], [1, 1, 1, 10, 10], [1, 1, 1, 10, 10], [1, 1, 1, 10, 10]]) perm_array = np.array([perm, perm, perm], dtype='float') fb = np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [2, 2, 2, 2, 2]], dtype='int') fb_array = np.array([fb, fb, fb], dtype='int') ntg = np.array([[0, 0.5, 0, 0.5, 0], [0.5, 0, 0.5, 0, 0.5], [0, 0.5, 0, 0.5, 0], [0.5, 0, 0.5, 0, 0.5], [0, 0.5, 0, 0.5, 0]]) ntg1_array = np.array([ntg, ntg, ntg]) ntg2_array = np.array([ntg + 0.1, ntg + 0.1, ntg + 0.1]) por = np.array([[1, 1, 1, 1, 1], [0.5, 0.5, 0.5, 0.5, 0.5], [1, 1, 1, 1, 1], [0.5, 0.5, 0.5, 0.5, 0.5], [1, 1, 1, 1, 1]]) por1_array = np.array([por, por, por]) por2_array = np.array([por - 0.1, por - 0.1, por - 0.1]) sat = np.array([[1, 0.5, 1, 0.5, 1], [1, 0.5, 1, 0.5, 1], [1, 0.5, 1, 0.5, 1], [1, 0.5, 1, 0.5, 1], [1, 0.5, 1, 0.5, 1]]) sat1_array = np.array([sat, sat, sat]) sat2_array = np.array([sat, sat, np.where(sat == 0.5, 0.75, sat)]) sat3_array = np.array([ np.where(sat == 0.5, 0.75, sat), np.where(sat == 0.5, 0.75, sat), np.where(sat == 0.5, 0.75, sat) ]) collection = rqp.GridPropertyCollection() collection.set_grid(grid) ts = rqts.TimeSeries(parent_model=model, first_timestamp='2000-01-01Z') ts.extend_by_days(365) ts.extend_by_days(365) ts.create_xml() lookup = rqp.StringLookup(parent_model=model, int_to_str_dict={ 1: 'channel', 2: 'interbedded', 3: 'shale' }) lookup.create_xml() model.store_epc() # Add non-varying properties for array, name, kind, discrete, facet_type, facet in zip( [zone_array, vpc_array, fb_array, perm_array], ['Zone', 'VPC', 'Fault block', 'Perm'], ['discrete', 'discrete', 'discrete', 'permeability rock'], [True, True, True, False], [None, None, None, 'direction'], [None, None, None, 'J']): collection.add_cached_array_to_imported_list(cached_array=array, source_info='', keyword=name, discrete=discrete, uom=None, time_index=None, null_value=None, property_kind=kind, facet_type=facet_type, facet=facet, realization=None) collection.write_hdf5_for_imported_list() collection.create_xml_for_imported_list_and_add_parts_to_model() # Add realisation varying properties for array, name, kind, rel in zip( [ntg1_array, por1_array, ntg2_array, por2_array], ['NTG', 'POR', 'NTG', 'POR'], ['net to gross ratio', 'porosity', 'net to gross ratio', 'porosity'], [0, 0, 1, 1]): collection.add_cached_array_to_imported_list(cached_array=array, source_info='', keyword=name, discrete=False, uom=None, time_index=None, null_value=None, property_kind=kind, facet_type=None, facet=None, realization=rel) collection.write_hdf5_for_imported_list() collection.create_xml_for_imported_list_and_add_parts_to_model() # Add categorial property collection.add_cached_array_to_imported_list(cached_array=facies_array, source_info='', keyword='Facies', discrete=True, uom=None, time_index=None, null_value=None, property_kind='discrete', facet_type=None, facet=None, realization=None) collection.write_hdf5_for_imported_list() collection.create_xml_for_imported_list_and_add_parts_to_model( string_lookup_uuid=lookup.uuid) # Add time varying properties for array, ts_index in zip([sat1_array, sat2_array, sat3_array], [0, 1, 2]): collection.add_cached_array_to_imported_list( cached_array=array, source_info='', keyword='SW', discrete=False, uom=None, time_index=ts_index, null_value=None, property_kind='saturation', facet_type='what', facet='water', realization=None) collection.write_hdf5_for_imported_list() collection.create_xml_for_imported_list_and_add_parts_to_model( time_series_uuid=ts.uuid) model.store_epc() return model
def equivalent_uuid_for_part(self, part, immigrant_model=None, ignore_identical_part=False): """Returns uuid of an equivalent part in resident model, or None if no equivalent found.""" # log.debug('Looking for equivalent uuid for: ' + str(part)) if not part: return None if immigrant_model is None: immigrant_model = self.model immigrant_uuid = rqet.uuid_in_part_name(part) # log.debug(' immigrant uuid: ' + str(immigrant_uuid)) if immigrant_uuid in self.map: # log.debug(' known to be equivalent to: ' + str(self.map[immigrant_uuid])) return self.map[immigrant_uuid] obj_type = immigrant_model.type_of_part(part, strip_obj=True) if obj_type is None or obj_type not in consolidatable_list: return None # log.debug(' object type is consolidatable') resident_uuids = self.model.uuids(obj_type=obj_type) if resident_uuids is None or len(resident_uuids) == 0: # log.debug(' no resident parts found of type: ' + str(obj_type)) return None # log.debug(f' {len(resident_uuids)} resident parts of same class') if not ignore_identical_part: for resident_uuid in resident_uuids: if bu.matching_uuids(resident_uuid, immigrant_uuid): # log.debug(' uuid already resident: ' + str(resident_uuid)) return resident_uuid # log.debug(' preparing immigrant object') if obj_type.endswith('Interpretation') or obj_type.endswith('Feature'): immigrant_obj = rqo.__dict__[obj_type](immigrant_model, uuid=immigrant_uuid) elif obj_type.endswith('Crs'): immigrant_obj = rqc.Crs(immigrant_model, uuid=immigrant_uuid) elif obj_type == 'TimeSeries': immigrant_obj = rqt.TimeSeries(immigrant_model, uuid=immigrant_uuid) elif obj_type == 'StringTableLookup': immigrant_obj = rqp.StringLookup(immigrant_model, uuid=immigrant_uuid) elif obj_type == 'PropertyKind': immigrant_obj = rqp.PropertyKind(immigrant_model, uuid=immigrant_uuid) else: raise Exception('code failure') assert immigrant_obj is not None for resident_uuid in resident_uuids: # log.debug(' considering resident: ' + str(resident_uuid)) if ignore_identical_part and bu.matching_uuids( resident_uuid, immigrant_uuid): continue if obj_type.endswith('Interpretation') or obj_type.endswith( 'Feature'): resident_obj = rqo.__dict__[obj_type](self.model, uuid=resident_uuid) elif obj_type.endswith('Crs'): resident_obj = rqc.Crs(self.model, uuid=resident_uuid) elif obj_type == 'TimeSeries': resident_obj = rqt.TimeSeries(self.model, uuid=resident_uuid) elif obj_type == 'StringTableLookup': resident_obj = rqp.StringLookup(self.model, uuid=resident_uuid) elif obj_type == 'PropertyKind': resident_obj = rqp.PropertyKind(self.model, uuid=resident_uuid) else: raise Exception('code failure') assert resident_obj is not None # log.debug(' comparing with: ' + str(resident_obj.uuid)) if immigrant_obj == resident_obj: # note: == operator overloaded with equivalence method for these classes while resident_uuid in self.map: # log.debug(' following equivalence for: ' + str(resident_uuid)) resident_uuid = self.map[resident_uuid] self.map[immigrant_uuid] = resident_uuid # log.debug(' new equivalence found with: ' + str(resident_uuid)) return resident_uuid return None
def gather_ensemble(case_epc_list, new_epc_file, consolidate=True, shared_grids=True, shared_time_series=True, create_epc_lookup=True): """Creates a composite resqml dataset by merging all parts from all models in list, assigning realization numbers. arguments: case_epc_list (list of strings): paths of individual realization epc files new_epc_file (string): path of new composite epc to be created (with paired hdf5 file) consolidate (boolean, default True): if True, simple parts are tested for equivalence and where similar enough a single shared object is established in the composite dataset shared_grids (boolean, default True): if True and consolidate is True, then grids are also consolidated with equivalence based on extent of grids (and citation titles if grid extents within the first case are not distinct); ignored if consolidate is False shared_time_series (boolean, default False): if True and consolidate is True, then time series are consolidated with equivalence based on title, without checking that timestamp lists are the same create_epc_lookup (boolean, default True): if True, a StringLookupTable is created to map from realization number to case epc path notes: property objects will have an integer realization number assigned, which matches the corresponding index into the case_epc_list; if consolidating with shared grids, then only properties will be gathered from realisations after the first and an exception will be raised if the grids are not matched between realisations """ if not consolidate: shared_grids = False composite_model = rq.Model(new_epc_file, new_epc=True, create_basics=True, create_hdf5_ext=True) epc_lookup_dict = {} for r, case_epc in enumerate(case_epc_list): t_r_start = time() # debug log.info(f'gathering realszation {r}: {case_epc}') epc_lookup_dict[r] = case_epc case_model = rq.Model(case_epc) if r == 0: # first case log.info('first case') # debug composite_model.copy_all_parts_from_other_model( case_model, realization=0, consolidate=consolidate) if shared_time_series: host_ts_uuids = case_model.uuids(obj_type='TimeSeries') host_ts_titles = [] for ts_uuid in host_ts_uuids: host_ts_titles.append(case_model.title(uuid=ts_uuid)) if shared_grids: host_grid_uuids = case_model.uuids( obj_type='IjkGridRepresentation') host_grid_shapes = [] host_grid_titles = [] title_match_required = False for grid_uuid in host_grid_uuids: grid_root = case_model.root(uuid=grid_uuid) host_grid_shapes.append( grr.extent_kji_from_root(grid_root)) host_grid_titles.append( rqet.citation_title_for_node(grid_root)) if len(set(host_grid_shapes)) < len(host_grid_shapes): log.warning( 'shapes of representative grids are not distinct, grid titles must match during ensemble gathering' ) title_match_required = True else: # subsequent cases log.info('subsequent case') # debug composite_model.consolidation = None # discard any previous mappings to limit dictionary growth if shared_time_series: for ts_uuid in case_model.uuids(obj_type='TimeSeries'): ts_title = case_model.title(uuid=ts_uuid) ts_index = host_ts_titles.index(ts_title) host_ts_uuid = host_ts_uuids[ts_index] composite_model.force_consolidation_uuid_equivalence( ts_uuid, host_ts_uuid) if shared_grids: log.info('shared grids') # debug for grid_uuid in case_model.uuids( obj_type='IjkGridRepresentation'): grid_root = case_model.root(uuid=grid_uuid) grid_extent = grr.extent_kji_from_root(grid_root) host_index = None if grid_extent in host_grid_shapes: if title_match_required: case_grid_title = rqet.citation_title_for_node( grid_root) for host_grid_index in len(host_grid_uuids): if grid_extent == host_grid_shapes[ host_grid_index] and case_grid_title == host_grid_titles[ host_grid_index]: host_index = host_grid_index break else: host_index = host_grid_shapes.index(grid_extent) assert host_index is not None, 'failed to match grids when gathering ensemble' composite_model.force_consolidation_uuid_equivalence( grid_uuid, host_grid_uuids[host_index]) grid_relatives = case_model.parts(related_uuid=grid_uuid) t_props = 0.0 composite_h5_file_name = composite_model.h5_file_name() composite_h5_uuid = composite_model.h5_uuid() case_h5_file_name = case_model.h5_file_name() for part in grid_relatives: if 'Property' in part: t_p_start = time() composite_model.copy_part_from_other_model( case_model, part, realization=r, consolidate=True, force=shared_time_series, self_h5_file_name=composite_h5_file_name, h5_uuid=composite_h5_uuid, other_h5_file_name=case_h5_file_name) t_props += time() - t_p_start log.info(f'time props: {t_props:.3f} sec') # debug else: log.info('non shared grids') # debug composite_model.copy_all_parts_from_other_model( case_model, realization=r, consolidate=consolidate) log.info(f'case time: {time() - t_r_start:.2f} secs') # debug if create_epc_lookup and len(epc_lookup_dict): epc_lookup = rqp.StringLookup(composite_model, int_to_str_dict=epc_lookup_dict, title='ensemble epc table') epc_lookup.create_xml() composite_model.store_epc() log.info( f'{len(epc_lookup_dict)} realizations merged into ensemble {new_epc_file}' )
def __init__( self, model, support_root=None, # deprecated uuid=None, df=None, uom_list=None, realization=None, title='dataframe', column_lookup_uuid=None, uom_lookup_uuid=None, extra_metadata=None): """Create a new Dataframe object from either a previously stored property or a pandas dataframe. arguments: model (model.Model): the model to which the new Dataframe will be attached support_root (lxml.Element, DEPRECATED): use uuid instead uuid (uuid.UUID, optional): the uuid of an existing Grid2dRepresentation object acting as support for a dataframe property (or holding the dataframe as z values) df (pandas.DataFrame, optional): a dataframe from which the new Dataframe is to be created; if both uuid (or support_root) and df are supplied, realization must not be None and a new realization property will be created uom_list (list of str, optional): a list holding the units of measure for each column; if present, length of list must match number of columns in df; ignored if uuid or support_root is not None realization (int, optional): if present, the realization number of the RESQML property holding the dataframe title (str, default 'dataframe'): used as the citation title for the Mesh (and property); ignored if uuid or support_root is not None column_lookup_uuid (uuid, optional): if present, the uuid of a string lookup table holding the column names; if present, the contents and order of the table must match the columns in the dataframe; if absent, a new lookup table will be created; ignored if support_root is not None uom_lookup_uuid (uuid, optional): if present, the uuid of a string lookup table holding the units of measure for each column; if None and uom_list is present, a new table will be created; if both uom_list and uom_lookup_uuid are present, their contents must match; ignored if support_root is not None extra_metadata (dict, optional): if present, a dictionary of extra metadata items, str: str; ignored if uuid (or support_root) is not None returns: a newly created Dataframe object notes: when initialising from an existing RESQML object, the supporting mesh and its property should have been originally created using this class; when working with ensembles, each object of this class will only handle the data for one realization, though they may share a common support_root """ assert uuid is not None or support_root is not None or df is not None assert (uuid is None and support_root is None) or df is None or realization is not None if uuid is None: if support_root is not None: warnings.warn( "support_root parameter is deprecated, use uuid instead", DeprecationWarning) uuid = rqet.uuid_for_part_root(support_root) else: support_root = model.root_for_uuid(uuid) self.model = model self.df = None self.n_rows = self.n_cols = 0 self.uom_list = None self.realization = realization self.title = title self.mesh = None # only generated when needed for write_hdf5(), create_xml() self.pc = None # property collection; only generated when needed for write_hdf5(), create_xml() self.column_lookup_uuid = column_lookup_uuid self.column_lookup = None # string lookup table mapping column index (0 based) to column name self.uom_lookup_uuid = uom_lookup_uuid self.uom_lookup = None # string lookup table mapping column index (0 based) to uom self.extra_metadata = extra_metadata if uuid is not None: assert rqet.node_type(support_root) == 'obj_Grid2dRepresentation' self.mesh = rqs.Mesh(self.model, uuid=uuid) self.extra_metadata = self.mesh.extra_metadata assert 'dataframe' in self.extra_metadata and self.extra_metadata[ 'dataframe'] == 'true' self.title = self.mesh.title self.n_rows, self.n_cols = self.mesh.nj, self.mesh.ni cl_uuid = self.model.uuid(obj_type='StringTableLookup', related_uuid=uuid, title='dataframe columns') assert cl_uuid is not None, 'column name lookup table not found for dataframe' self.column_lookup = rqp.StringLookup(self.model, uuid=cl_uuid) self.column_lookup_uuid = self.column_lookup.uuid assert self.column_lookup.length() == self.n_cols ul_uuid = self.model.uuid(obj_type='StringTableLookup', related_uuid=uuid, title='dataframe units') if ul_uuid is not None: self.uom_lookup = rqp.StringLookup(self.model, uuid=ul_uuid) self.uom_lookup_uuid = self.uom_lookup.uuid self.uom_list = self.uom_lookup.get_list() da = self.mesh.full_array_ref( )[..., 2] # dataframe data as 2D numpy array, defaulting to z values in mesh existing_pc = rqp.PropertyCollection(support=self.mesh) existing_count = 0 if existing_pc is None else existing_pc.number_of_parts( ) if df is None: # existing dara, either in mesh or property if existing_count > 0: # use property data instead of z values if existing_count == 1: if self.realization is not None: assert existing_pc.realization_for_part( existing_pc.singleton()) == self.realization else: assert self.realization is not None, 'no realization specified when accessing ensemble dataframe' da = existing_pc.single_array_ref( realization=self.realization) assert da is not None and da.ndim == 2 and da.shape == ( self.n_rows, self.n_cols) else: assert realization is None self.df = pd.DataFrame(da, columns=self.column_lookup.get_list()) else: # both support_root and df supplied: add a new realisation if existing_count > 0: assert existing_pc.singleton( realization=self.realization ) is None, 'dataframe realization already exists' self.df = df.copy() assert len(self.df) == self.n_rows assert len(self.df.columns) == self.n_rows else: assert df is not None, 'no dataframe (or support root) provided when instantiating DataFrame object' self.df = df.copy() # todo: check data type of columns – restrict to numerical data self.n_rows = len(self.df) self.n_cols = len(self.df.columns) if column_lookup_uuid is not None: self.column_lookup = rqp.StringLookup(self.model, uuid=column_lookup_uuid) assert self.column_lookup is not None assert self.column_lookup.length() == self.n_cols assert all(self.df.columns == self.column_lookup.get_list() ) # exact match of column names required! if uom_lookup_uuid is not None: self.uom_lookup = rqp.StringLookup(self.model, uuid=uom_lookup_uuid) assert self.uom_lookup is not None if uom_list is not None: assert len(uom_list) == self.n_cols self.uom_list = uom_list.copy() if self.uom_lookup is not None: assert self.uom_list == self.uom_lookup.get_list() elif self.uom_lookup is not None: self.uom_list = self.uom_lookup.get_list()