def read(self,num=None,unpack=True): """ Read num records from the current position. """ #pdb.set_trace() recs = [] if num == 0: return recs elif num == 1: reclist = [self.recordnumber+1] elif num > 1: reclist = list(range(self.recordnumber+1,self.recordnumber+1+num)) for n in reclist: nn = n-1 # Use this for the self._index referencing kwargs = {} self.seek(n) kwargs['ioctet'] = self._index['size'][nn] kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4') if self._index['type'][nn] == 'data': kwargs['reference_date'] = self._index['date'][nn] rec = pytdlpack.TdlpackRecord(**kwargs) if unpack: rec.unpack() recs.append(rec) elif self._index['type'][nn] == 'station': kwargs['ipack'] = kwargs['ipack'].byteswap() kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR) rec = pytdlpack.TdlpackStationRecord(**kwargs) if unpack: rec.unpack() recs.append(rec) elif self._index['type'][nn] == 'trailer': recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs)) self.recordnumber = n return recs
import pytdlpack import numpy as np # ---------------------------------------------------------------------------------------- # Open new TDLPACK file. Here we use pytdlpack.open() which is a function that returns # an instance (i.e. an object) of class pytdlpack.TdlpackFile. # ---------------------------------------------------------------------------------------- f = pytdlpack.open("station.sq",mode="w") print "TDLPACK FILE INFO" print f # ---------------------------------------------------------------------------------------- # Define a station list and create an instance of TdlpackStationRecord # ---------------------------------------------------------------------------------------- ccall = ('KBWI','KPHL','KIAD','KLNS','KACY') # Parentheses indictates a tuple (can also be a list). stationrec = pytdlpack.TdlpackStationRecord(ccall=ccall) # All that is needed a station call letter list/tuple. stationrec.pack() # pack() is a method (i.e. a function that acts on a class instance). f.write(stationrec) # write() is a method of class TdlpackFile. # ---------------------------------------------------------------------------------------- # Define TDLPACK Identification Sections # # NOTE: The pytdlpack module contains constansts to use! -- Like ND7 # ---------------------------------------------------------------------------------------- is1 = np.zeros((pytdlpack.ND7),dtype=np.int32) is1[0] = 0 is1[1] = 0 is1[2] = 2018 is1[3] = 1 is1[4] = 2 is1[5] = 12
def to_tdlpack(self, file, mode='w-', compute: bool = True, var_constants=None, min_unique=1000): ''' mode : {"w", "w-"}, optional, default: "w-" Persistence mode: "w" means create (overwrite if exists); "w-" means create (fail if exists); ''' have_chunks = any(v.chunks for v in self._obj.variables.values()) # ensuring has x/y or station dims and that any chunks do not span those dims if 'station' in self._obj.dims: station = True if have_chunks: self._obj = self._obj.chunk({'station': -1}) elif 'x' in self._obj.dims and 'y' in self._obj.dims: station = False if have_chunks: self._obj = self._obj.chunk({'x': -1, 'y': -1}) else: raise ValueError( "data does not have 'x' and 'y' or 'station' dims for writing to tdlp grid or station formats" ) possible_multi_var_keys = [ 'ccc', 'fff', 'b', 'dd', 'v', 'llll', 'uuuu', 't', 'o', 'i', 's', 'g' ] multi_var_keys = [ k for k in possible_multi_var_keys if not self._iscoord(k) ] meta_dicts = list() for var in self._obj.data_vars: da = self._obj[var] meta_dicts.append( {key: da.encoding[f'tdlp_{key}'] for key in multi_var_keys}) df = pd.DataFrame(meta_dicts).nunique() meta_varying_by_var = df.index[df > 1] meta = RequiredTdlpMeta() keys = list(meta.__dataclass_fields__.keys()) coord_meta = list() const_meta = list() tdlpid = TdlpId() for key in keys: if f'tdlp_{key}' in da.encoding: meta[key] = da.encoding[f'tdlp_{key}'] tdlpid[key] = meta[key] const_meta.append(key) continue found = False for coord_name in self._obj.coords: coord = self._obj[coord_name] if 'tdlp_name' in coord.attrs: if coord.attrs['tdlp_name'] == key: found = True coord_meta.append(key) meta[key] = coord break if not found: raise ValueError( f'to_tdlpack requres metadata for {key} be in encoding or coordinate' ) filepath = Path(file) if mode == 'w-': if filepath.exists(): raise ValueError( f"{file} already exists and will not be overwritten; mode: 'w' can overwrite existing files" ) elif mode == 'w': if filepath.is_dir(): raise ValueError(f"cannot clobber directory {file}") open(filepath, 'w').close() store = filepath.parent / f'.{filepath.name}' if store.is_dir(): logger.warning(f'removing existing hidden directory {store}') shutil.rmtree(store) store.mkdir(parents=True) prodicized = product(*[meta[k] for k in coord_meta]) f = pytdlpack.open(store / filepath.name, mode='w', format='sequential') if station: template_rec = pytdlpack.TdlpackRecord(date=0, id=[0, 0, 0, 0], data=np.array([0])) stations = pytdlpack.TdlpackStationRecord( list(self._obj.station.data)) stations.pack() f.write(stations) else: # the grid doesn't matter ( can tweak/clean later) template_rec = pytdlpack.TdlpackRecord( date=0, id=[0, 0, 0, 0], grid=pytdlpack.grids['nbmak'], data=np.array([0])) template_rec.is2 = da.encoding[ 'tdlp_is2'] # this loads the grid metadata template_rec.primary_missing_value = 9999.0 for t in prodicized: for var in self._obj.data_vars: # select slice of array for tdlpack record loc = {k: v for (k, v) in zip(coord_meta, t)} da = self._obj[var].loc[loc].squeeze() # put extra metadata that varies by variable in loc for updating tdlpid for m in meta_varying_by_var: loc[m] = da.encoding[f'tdlp_{m}'] tdlpid.update(**loc) # shape data array appropriately for station or grid formatted tdlpack record if station: data = da.data else: data = da.data.transpose() # build out a tdlpack DataRecord with appropriate metadata idlist = [ tdlpid.word1, tdlpid.word2, tdlpid.word3, tdlpid.word4 ] if var_constants is None: plain = 'NO VAR MATCH' # let dec_scale allow for min_unique values in the space between the max and min datamax = np.nanmax(data) datamin = np.nanmin(data) if datamax == datamin or np.isnan(datamax): dec_scale = 9 # data is a constant or all missing and will compress well else: log10range = np.log10( np.nanmax(data) - np.nanmin(data)) range_place = np.floor(log10range) dec_scale = int( np.ceil(np.log10(min_unique)) - range_place) else: plain = var_constants.loc[tdlpid.cccfff]['plain'] dec_scale = var_constants.loc[tdlpid.cccfff]['iscale'] date = da.date.data.squeeze()[()] rec = make_record(template_rec, idlist, data, 'PLAIN TEXT', date) rec.pack(dec_scale=dec_scale) logger.debug( f'writing {date}, {idlist} with dec_scale: {dec_scale}') f.write(rec) f.close() shutil.move(store / filepath.name, file) shutil.rmtree(store)
sys.version_info.major) + '.' + str(sys.version_info.minor) sys.path.insert(0, build_path) import pytdlpack # ---------------------------------------------------------------------------------------- # Create some data # ---------------------------------------------------------------------------------------- date = 2019052900 id = [4210008, 10, 24, 0] station_name = ('KACY', 'KBWI', 'KDCA', 'KIAD', 'KPHL') station_data = [10.3, 12.4, 15.6, 8.6, 9999.0] # ---------------------------------------------------------------------------------------- # Create station record and pack # ---------------------------------------------------------------------------------------- sta = pytdlpack.TdlpackStationRecord(station_name) sta.pack() # ---------------------------------------------------------------------------------------- # Create TDLPACK data record and pack # ---------------------------------------------------------------------------------------- rec = pytdlpack.TdlpackRecord(date=date, id=id, lead=24, plain="GFS WIND SPEED", data=station_data, missing_value=9999.0) rec.pack(dec_scale=1) # ---------------------------------------------------------------------------------------- # Open new sequential file and write the records