def records(self): """ The Borealis data in a dictionary of records, according to the site file format. Raises ------ BorealisRestructureError Errors restructuring from arrays to site style file """ if self.format.is_restructureable(): try: records = self.format._array_to_site(self.arrays) BorealisUtilities.check_records( self.filename, records, self.format.site_single_element_types(), self.format.site_array_dtypes()) except Exception as err: raise borealis_exceptions.BorealisRestructureError( 'Arrays for {}: Error restructuring {} from array to site' ' style: {}'.format(self.filename, self.format.__name__, err)) from err else: raise borealis_exceptions.BorealisRestructureError( 'Arrays for {}: File format {} not recognized as ' 'restructureable from site to array style or vice versa.' ''.format(self.filename, self.format.__name__)) return records
def arrays(self): """ The Borealis data in a dictionary of arrays, according to the restructured array file format. Raises ------ BorealisRestructureError Errors in restructuring to arrays style file. """ if self.format.is_restructureable(): try: arrays = self.format._site_to_array(self.records) BorealisUtilities.check_arrays( self.filename, arrays, self.format.array_single_element_types(), self.format.array_array_dtypes(), self.format.unshared_fields()) except Exception as err: raise borealis_exceptions.BorealisRestructureError( 'Records for {}: Error restructuring {} from site to array' ' style: {}' ''.format(self.filename, self.format.__name__, err)) \ from err else: raise borealis_exceptions.BorealisRestructureError( 'Records for {}: File format {} not recognized as ' 'restructureable from site to array style' ''.format(self.filename, self.format.__name__)) return arrays
def _array_to_site(cls, data_dict: dict) -> OrderedDict: """ Base function for converting array Borealis data to site format. Parameters ---------- data_dict: dictionary of array restructured Borealis data. Returns ------- new_data_dict An OrderedDict of timestamped records as if loaded from the original site file. See Also -------- is_restructureable flatten_site_arrays shared_fields site_specific_fields_generate unshared_fields_dims_site Notes ----- The results will differ based on the format class, as many of the class methods used inside this method should be specific to the format and updated in the child class. However, this is the process required for any restructuring, so this method itself should not be updated by the child class. """ if not cls.is_restructureable(): raise borealis_exceptions.BorealisRestructureError( 'File format {} not recognized as ' 'restructureable from site to array style or vice versa.' ''.format(cls.__name__)) timestamp_dict = OrderedDict() for record_num, seq_timestamp in \ enumerate(data_dict["sqn_timestamps"]): # format dictionary key in the same way it is done # in datawrite on site seq_datetime = datetime.utcfromtimestamp(seq_timestamp[0]) epoch = datetime.utcfromtimestamp(0) key = str(int((seq_datetime - epoch).total_seconds() * 1000)) timestamp_dict[key] = dict() # populate shared fields in each record, for field in cls.shared_fields(): timestamp_dict[key][field] = data_dict[field] # populate site specific fields using given functions # that take both the arrays data and the record number for field in cls.site_specific_fields(): timestamp_dict[key][field] = cls.site_specific_fields_generate( )[field](data_dict, record_num) for field in cls.unshared_fields(): if field in cls.single_element_types(): datatype = cls.single_element_types()[field] # field is not an array, single element per record. # unshared_field_dims_site should give empty list. timestamp_dict[key][field] = datatype( data_dict[field][record_num]) else: # field in array_dtypes datatype = cls.array_dtypes()[field] # need to get the dims correct, not always equal to the max site_dims = [ dimension_function(data_dict, record_num) for dimension_function in cls.unshared_fields_dims_site()[field] ] index_slice = [slice(0, i) for i in site_dims] index_slice.insert(0, record_num) index_slice = tuple(index_slice) timestamp_dict[key][field] = data_dict[field][index_slice] timestamp_dict = cls.flatten_site_arrays(timestamp_dict) return timestamp_dict
def _site_to_array(cls, data_dict: OrderedDict) -> dict: """ Base function for converting site Borealis data to restructured array format. Parameters ---------- data_dict: OrderedDict a dict of timestamped records loaded from an hdf5 Borealis site file Returns ------- new_data_dict A dictionary containing the data from data_dict reformatted to be stored entirely in array style, or as one entry if the field does not change between records. This means that for fields that change between records, the first dimension in the array will equal num_records (these are called unshared_fields). For fields common to all records, there will only be the one value that applies (these are known as shared_fields). See Also -------- is_restructureable reshape_site_arrays shared_fields array_specific_fields_generate unshared_fields_dims_array Notes ----- The results will differ based on the format class, as many of the class methods used inside this method should be specific to the format and updated in the child class. However, this is the process required for any restructuring, so this method itself should not be updated by the child class. """ if not cls.is_restructureable(): raise borealis_exceptions.BorealisRestructureError( 'File format {} not recognized as ' 'restructureable from site to array style or vice versa.' ''.format(cls.__name__)) new_data_dict = dict() num_records = len(data_dict) # some fields are linear in site style and need to be reshaped. data_dict = cls.reshape_site_arrays(data_dict) # write shared fields to dictionary first_key = list(data_dict.keys())[0] for field in cls.shared_fields(): new_data_dict[field] = data_dict[first_key][field] # write array specific fields using the given functions. for field in cls.array_specific_fields(): new_data_dict[field] = cls.array_specific_fields_generate()[field]( data_dict) # write the unshared fields, initializing empty arrays to start. temp_array_dict = dict() # get array dims of the unshared fields arrays field_dimensions = {} for field in cls.unshared_fields(): dims = [ dimension_function(data_dict) for dimension_function in cls.unshared_fields_dims_array()[field] ] field_dimensions[field] = dims # all fields to become arrays for field, dims in field_dimensions.items(): array_dims = [num_records] + dims array_dims = tuple(array_dims) if field in cls.single_element_types(): datatype = cls.single_element_types()[field] else: # field in array_dtypes datatype = cls.array_dtypes()[field] empty_array = np.empty(array_dims, dtype=datatype) # initialize all values to NaN; some indices may not be filled # do to dimensions that are max values (num sequences, etc can # change between records) empty_array[:] = np.NaN temp_array_dict[field] = empty_array # iterate through the records, filling the unshared and array only # fields for rec_idx, k in enumerate(data_dict.keys()): for field in cls.unshared_fields(): # all unshared fields empty_array = temp_array_dict[field] if type(data_dict[first_key][field]) == np.ndarray: # only fill the correct length, appended NaNs occur for # dims with a determined max value data_buffer = data_dict[k][field] buffer_shape = data_buffer.shape index_slice = [slice(0, i) for i in buffer_shape] # insert record index at start of array's slice list index_slice.insert(0, rec_idx) index_slice = tuple(index_slice) # place data buffer in the correct place empty_array[index_slice] = data_buffer else: # not an array, num_records is the only dimension empty_array[rec_idx] = data_dict[k][field] new_data_dict.update(temp_array_dict) return new_data_dict