def read(self,num=None,unpack=True): """ Read num records from the current position. """ #pdb.set_trace() recs = [] if num == 0: return recs elif num == 1: reclist = [self.recordnumber+1] elif num > 1: reclist = list(range(self.recordnumber+1,self.recordnumber+1+num)) for n in reclist: nn = n-1 # Use this for the self._index referencing kwargs = {} self.seek(n) kwargs['ioctet'] = self._index['size'][nn] kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4') if self._index['type'][nn] == 'data': kwargs['reference_date'] = self._index['date'][nn] rec = pytdlpack.TdlpackRecord(**kwargs) if unpack: rec.unpack() recs.append(rec) elif self._index['type'][nn] == 'station': kwargs['ipack'] = kwargs['ipack'].byteswap() kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR) rec = pytdlpack.TdlpackStationRecord(**kwargs) if unpack: rec.unpack() recs.append(rec) elif self._index['type'][nn] == 'trailer': recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs)) self.recordnumber = n return recs
ny = 1597 date = 2019052900 id = [4210008,10,24,0] grid_data = np.random.rand(nx,ny)*75.0 grid_data.fill(np.nan) # ---------------------------------------------------------------------------------------- # Grid Specs: CONUS Lambert-Conformal 2.5km 2345x1597 # ---------------------------------------------------------------------------------------- griddef = pytdlpack.create_grid_definition(proj=3,nx=nx,ny=ny,latll=19.2290, lonll=233.7234,orientlon=265.,stdlat=25.,meshlength=2.539703) # ---------------------------------------------------------------------------------------- # Create TDLPACK data record and pack # ---------------------------------------------------------------------------------------- rec = pytdlpack.TdlpackRecord(date=date,id=id,lead=24,plain="GFS WIND SPEED", data=grid_data,missing_value=9999.0,grid=griddef) rec.pack(dec_scale=3) # ---------------------------------------------------------------------------------------- # Open new sequential file and write the records # ---------------------------------------------------------------------------------------- f = pytdlpack.open('new_grid.sq',mode='w',format='sequential') f.write(rec) f.close() # ---------------------------------------------------------------------------------------- # Open new random-access file and write the records # ---------------------------------------------------------------------------------------- fra = pytdlpack.open('new_grid.ra',mode='w',format='random-access',ra_template='large') fra.write(rec) fra.close()
is2 = np.zeros((pytdlpack.ND7),dtype=np.int32) # Leave is2 empty since we are not packing a gridded record. is4 = np.zeros((pytdlpack.ND7),dtype=np.int32) is4[0] = 0 is4[1] = 0 is4[2] = stationrec.number_of_stations is4[3] = np.int32(9999) is4[4] = np.int32(0) # ---------------------------------------------------------------------------------------- # Create some fake temperture-like data and create an instance of TdlpackRecord where all # is* arrays are passed in, along with plain and data. # # NOTE: is0 is not passed as this section is created by the system. # ---------------------------------------------------------------------------------------- data = np.array([65.4,59.2,68.0,66.1,9999.0],dtype=np.float32) temprec = pytdlpack.TdlpackRecord(is1=is1,is2=is2,is4=is4,plain=plain,data=data) temprec.pack() # The temperature data has now been packed. f.write(temprec) # The packed temperature record has been written to file. # ---------------------------------------------------------------------------------------- # Close the TDLPACK file. # ---------------------------------------------------------------------------------------- f.close() # ---------------------------------------------------------------------------------------- # Re-open file. # ---------------------------------------------------------------------------------------- del f f = pytdlpack.open("station.sq",mode="r") recs = f.read(all=True) print recs
def to_tdlpack(self, file, mode='w-', compute: bool = True, var_constants=None, min_unique=1000): ''' mode : {"w", "w-"}, optional, default: "w-" Persistence mode: "w" means create (overwrite if exists); "w-" means create (fail if exists); ''' have_chunks = any(v.chunks for v in self._obj.variables.values()) # ensuring has x/y or station dims and that any chunks do not span those dims if 'station' in self._obj.dims: station = True if have_chunks: self._obj = self._obj.chunk({'station': -1}) elif 'x' in self._obj.dims and 'y' in self._obj.dims: station = False if have_chunks: self._obj = self._obj.chunk({'x': -1, 'y': -1}) else: raise ValueError( "data does not have 'x' and 'y' or 'station' dims for writing to tdlp grid or station formats" ) possible_multi_var_keys = [ 'ccc', 'fff', 'b', 'dd', 'v', 'llll', 'uuuu', 't', 'o', 'i', 's', 'g' ] multi_var_keys = [ k for k in possible_multi_var_keys if not self._iscoord(k) ] meta_dicts = list() for var in self._obj.data_vars: da = self._obj[var] meta_dicts.append( {key: da.encoding[f'tdlp_{key}'] for key in multi_var_keys}) df = pd.DataFrame(meta_dicts).nunique() meta_varying_by_var = df.index[df > 1] meta = RequiredTdlpMeta() keys = list(meta.__dataclass_fields__.keys()) coord_meta = list() const_meta = list() tdlpid = TdlpId() for key in keys: if f'tdlp_{key}' in da.encoding: meta[key] = da.encoding[f'tdlp_{key}'] tdlpid[key] = meta[key] const_meta.append(key) continue found = False for coord_name in self._obj.coords: coord = self._obj[coord_name] if 'tdlp_name' in coord.attrs: if coord.attrs['tdlp_name'] == key: found = True coord_meta.append(key) meta[key] = coord break if not found: raise ValueError( f'to_tdlpack requres metadata for {key} be in encoding or coordinate' ) filepath = Path(file) if mode == 'w-': if filepath.exists(): raise ValueError( f"{file} already exists and will not be overwritten; mode: 'w' can overwrite existing files" ) elif mode == 'w': if filepath.is_dir(): raise ValueError(f"cannot clobber directory {file}") open(filepath, 'w').close() store = filepath.parent / f'.{filepath.name}' if store.is_dir(): logger.warning(f'removing existing hidden directory {store}') shutil.rmtree(store) store.mkdir(parents=True) prodicized = product(*[meta[k] for k in coord_meta]) f = pytdlpack.open(store / filepath.name, mode='w', format='sequential') if station: template_rec = pytdlpack.TdlpackRecord(date=0, id=[0, 0, 0, 0], data=np.array([0])) stations = pytdlpack.TdlpackStationRecord( list(self._obj.station.data)) stations.pack() f.write(stations) else: # the grid doesn't matter ( can tweak/clean later) template_rec = pytdlpack.TdlpackRecord( date=0, id=[0, 0, 0, 0], grid=pytdlpack.grids['nbmak'], data=np.array([0])) template_rec.is2 = da.encoding[ 'tdlp_is2'] # this loads the grid metadata template_rec.primary_missing_value = 9999.0 for t in prodicized: for var in self._obj.data_vars: # select slice of array for tdlpack record loc = {k: v for (k, v) in zip(coord_meta, t)} da = self._obj[var].loc[loc].squeeze() # put extra metadata that varies by variable in loc for updating tdlpid for m in meta_varying_by_var: loc[m] = da.encoding[f'tdlp_{m}'] tdlpid.update(**loc) # shape data array appropriately for station or grid formatted tdlpack record if station: data = da.data else: data = da.data.transpose() # build out a tdlpack DataRecord with appropriate metadata idlist = [ tdlpid.word1, tdlpid.word2, tdlpid.word3, tdlpid.word4 ] if var_constants is None: plain = 'NO VAR MATCH' # let dec_scale allow for min_unique values in the space between the max and min datamax = np.nanmax(data) datamin = np.nanmin(data) if datamax == datamin or np.isnan(datamax): dec_scale = 9 # data is a constant or all missing and will compress well else: log10range = np.log10( np.nanmax(data) - np.nanmin(data)) range_place = np.floor(log10range) dec_scale = int( np.ceil(np.log10(min_unique)) - range_place) else: plain = var_constants.loc[tdlpid.cccfff]['plain'] dec_scale = var_constants.loc[tdlpid.cccfff]['iscale'] date = da.date.data.squeeze()[()] rec = make_record(template_rec, idlist, data, 'PLAIN TEXT', date) rec.pack(dec_scale=dec_scale) logger.debug( f'writing {date}, {idlist} with dec_scale: {dec_scale}') f.write(rec) f.close() shutil.move(store / filepath.name, file) shutil.rmtree(store)
date = 2019052900 id = [4210008, 10, 24, 0] station_name = ('KACY', 'KBWI', 'KDCA', 'KIAD', 'KPHL') station_data = [10.3, 12.4, 15.6, 8.6, 9999.0] # ---------------------------------------------------------------------------------------- # Create station record and pack # ---------------------------------------------------------------------------------------- sta = pytdlpack.TdlpackStationRecord(station_name) sta.pack() # ---------------------------------------------------------------------------------------- # Create TDLPACK data record and pack # ---------------------------------------------------------------------------------------- rec = pytdlpack.TdlpackRecord(date=date, id=id, lead=24, plain="GFS WIND SPEED", data=station_data, missing_value=9999.0) rec.pack(dec_scale=1) # ---------------------------------------------------------------------------------------- # Open new sequential file and write the records # ---------------------------------------------------------------------------------------- f = pytdlpack.open('new_station.sq', mode='w', format='sequential') f.write(sta) f.write(rec) f.write(pytdlpack.TdlpackTrailerRecord()) f.close()