Example #1
0
 def read(self,num=None,unpack=True):
     """
     Read num records from the current position.
     """
     #pdb.set_trace()
     recs = []
     if num == 0:
         return recs
     elif num == 1:
         reclist = [self.recordnumber+1]
     elif num > 1:
         reclist = list(range(self.recordnumber+1,self.recordnumber+1+num))
     for n in reclist:
         nn = n-1 # Use this for the self._index referencing
         kwargs = {}
         self.seek(n)
         kwargs['ioctet'] = self._index['size'][nn]
         kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4')
         if self._index['type'][nn] == 'data':
             kwargs['reference_date'] = self._index['date'][nn]
             rec = pytdlpack.TdlpackRecord(**kwargs)
             if unpack: rec.unpack()
             recs.append(rec)
         elif self._index['type'][nn] == 'station':
             kwargs['ipack'] = kwargs['ipack'].byteswap()
             kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR)
             rec = pytdlpack.TdlpackStationRecord(**kwargs)
             if unpack: rec.unpack()
             recs.append(rec)
         elif self._index['type'][nn] == 'trailer':
             recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs))
         self.recordnumber = n
     return recs
Example #2
0
ny = 1597
date = 2019052900
id = [4210008,10,24,0]
grid_data = np.random.rand(nx,ny)*75.0
grid_data.fill(np.nan)

# ---------------------------------------------------------------------------------------- 
# Grid Specs: CONUS Lambert-Conformal 2.5km 2345x1597 
# ---------------------------------------------------------------------------------------- 
griddef = pytdlpack.create_grid_definition(proj=3,nx=nx,ny=ny,latll=19.2290,
          lonll=233.7234,orientlon=265.,stdlat=25.,meshlength=2.539703)

# ---------------------------------------------------------------------------------------- 
# Create TDLPACK data record and pack
# ---------------------------------------------------------------------------------------- 
rec = pytdlpack.TdlpackRecord(date=date,id=id,lead=24,plain="GFS WIND SPEED",
                              data=grid_data,missing_value=9999.0,grid=griddef)
rec.pack(dec_scale=3)

# ---------------------------------------------------------------------------------------- 
# Open new sequential file and write the records
# ---------------------------------------------------------------------------------------- 
f = pytdlpack.open('new_grid.sq',mode='w',format='sequential')
f.write(rec)
f.close()

# ---------------------------------------------------------------------------------------- 
# Open new random-access file and write the records
# ---------------------------------------------------------------------------------------- 
fra = pytdlpack.open('new_grid.ra',mode='w',format='random-access',ra_template='large')
fra.write(rec)
fra.close()
Example #3
0
is2 = np.zeros((pytdlpack.ND7),dtype=np.int32) # Leave is2 empty since we are not packing a gridded record.
is4 = np.zeros((pytdlpack.ND7),dtype=np.int32)
is4[0] = 0
is4[1] = 0
is4[2] = stationrec.number_of_stations
is4[3] = np.int32(9999)
is4[4] = np.int32(0)

# ---------------------------------------------------------------------------------------- 
# Create some fake temperture-like data and create an instance of TdlpackRecord where all
# is* arrays are passed in, along with plain and data.
#
# NOTE: is0 is not passed as this section is created by the system.
# ---------------------------------------------------------------------------------------- 
data = np.array([65.4,59.2,68.0,66.1,9999.0],dtype=np.float32)
temprec = pytdlpack.TdlpackRecord(is1=is1,is2=is2,is4=is4,plain=plain,data=data)
temprec.pack() # The temperature data has now been packed.
f.write(temprec) # The packed temperature record has been written to file.

# ---------------------------------------------------------------------------------------- 
# Close the TDLPACK file.
# ---------------------------------------------------------------------------------------- 
f.close()

# ----------------------------------------------------------------------------------------
# Re-open file.
# ----------------------------------------------------------------------------------------
del f
f = pytdlpack.open("station.sq",mode="r")
recs = f.read(all=True)
print recs
Example #4
0
    def to_tdlpack(self,
                   file,
                   mode='w-',
                   compute: bool = True,
                   var_constants=None,
                   min_unique=1000):
        '''
        mode : {"w", "w-"}, optional, default: "w-"
        Persistence mode: "w" means create (overwrite if exists);
        "w-" means create (fail if exists);
        '''

        have_chunks = any(v.chunks for v in self._obj.variables.values())
        # ensuring has x/y or station dims and that any chunks do not span those dims
        if 'station' in self._obj.dims:
            station = True
            if have_chunks:
                self._obj = self._obj.chunk({'station': -1})
        elif 'x' in self._obj.dims and 'y' in self._obj.dims:
            station = False
            if have_chunks:
                self._obj = self._obj.chunk({'x': -1, 'y': -1})
        else:
            raise ValueError(
                "data does not have 'x' and 'y' or 'station' dims for writing to tdlp grid or station formats"
            )

        possible_multi_var_keys = [
            'ccc', 'fff', 'b', 'dd', 'v', 'llll', 'uuuu', 't', 'o', 'i', 's',
            'g'
        ]
        multi_var_keys = [
            k for k in possible_multi_var_keys if not self._iscoord(k)
        ]
        meta_dicts = list()
        for var in self._obj.data_vars:
            da = self._obj[var]
            meta_dicts.append(
                {key: da.encoding[f'tdlp_{key}']
                 for key in multi_var_keys})
        df = pd.DataFrame(meta_dicts).nunique()
        meta_varying_by_var = df.index[df > 1]

        meta = RequiredTdlpMeta()
        keys = list(meta.__dataclass_fields__.keys())
        coord_meta = list()
        const_meta = list()
        tdlpid = TdlpId()
        for key in keys:
            if f'tdlp_{key}' in da.encoding:
                meta[key] = da.encoding[f'tdlp_{key}']
                tdlpid[key] = meta[key]
                const_meta.append(key)
                continue
            found = False
            for coord_name in self._obj.coords:
                coord = self._obj[coord_name]
                if 'tdlp_name' in coord.attrs:
                    if coord.attrs['tdlp_name'] == key:
                        found = True
                        coord_meta.append(key)
                        meta[key] = coord
                        break
            if not found:
                raise ValueError(
                    f'to_tdlpack requres metadata for {key} be in encoding or coordinate'
                )

        filepath = Path(file)
        if mode == 'w-':
            if filepath.exists():
                raise ValueError(
                    f"{file} already exists and will not be overwritten; mode: 'w' can overwrite existing files"
                )
        elif mode == 'w':
            if filepath.is_dir():
                raise ValueError(f"cannot clobber directory {file}")

        open(filepath, 'w').close()
        store = filepath.parent / f'.{filepath.name}'
        if store.is_dir():
            logger.warning(f'removing existing hidden directory {store}')
            shutil.rmtree(store)
        store.mkdir(parents=True)

        prodicized = product(*[meta[k] for k in coord_meta])
        f = pytdlpack.open(store / filepath.name,
                           mode='w',
                           format='sequential')
        if station:
            template_rec = pytdlpack.TdlpackRecord(date=0,
                                                   id=[0, 0, 0, 0],
                                                   data=np.array([0]))
            stations = pytdlpack.TdlpackStationRecord(
                list(self._obj.station.data))
            stations.pack()
            f.write(stations)
        else:
            # the grid doesn't matter ( can tweak/clean later)
            template_rec = pytdlpack.TdlpackRecord(
                date=0,
                id=[0, 0, 0, 0],
                grid=pytdlpack.grids['nbmak'],
                data=np.array([0]))
            template_rec.is2 = da.encoding[
                'tdlp_is2']  # this loads the grid metadata
        template_rec.primary_missing_value = 9999.0

        for t in prodicized:
            for var in self._obj.data_vars:
                # select slice of array for tdlpack record
                loc = {k: v for (k, v) in zip(coord_meta, t)}
                da = self._obj[var].loc[loc].squeeze()

                # put extra metadata that varies by variable in loc for updating tdlpid
                for m in meta_varying_by_var:
                    loc[m] = da.encoding[f'tdlp_{m}']
                tdlpid.update(**loc)

                # shape data array appropriately for station or grid formatted tdlpack record
                if station:
                    data = da.data
                else:
                    data = da.data.transpose()

                # build out a tdlpack DataRecord with appropriate metadata
                idlist = [
                    tdlpid.word1, tdlpid.word2, tdlpid.word3, tdlpid.word4
                ]
                if var_constants is None:
                    plain = 'NO VAR MATCH'
                    # let dec_scale allow for min_unique values in the space between the max and min
                    datamax = np.nanmax(data)
                    datamin = np.nanmin(data)
                    if datamax == datamin or np.isnan(datamax):
                        dec_scale = 9  # data is a constant or all missing and will compress well
                    else:
                        log10range = np.log10(
                            np.nanmax(data) - np.nanmin(data))
                        range_place = np.floor(log10range)
                        dec_scale = int(
                            np.ceil(np.log10(min_unique)) - range_place)
                else:
                    plain = var_constants.loc[tdlpid.cccfff]['plain']
                    dec_scale = var_constants.loc[tdlpid.cccfff]['iscale']
                date = da.date.data.squeeze()[()]
                rec = make_record(template_rec, idlist, data, 'PLAIN TEXT',
                                  date)
                rec.pack(dec_scale=dec_scale)
                logger.debug(
                    f'writing {date}, {idlist} with dec_scale: {dec_scale}')
                f.write(rec)

        f.close()
        shutil.move(store / filepath.name, file)
        shutil.rmtree(store)
Example #5
0
date = 2019052900
id = [4210008, 10, 24, 0]
station_name = ('KACY', 'KBWI', 'KDCA', 'KIAD', 'KPHL')
station_data = [10.3, 12.4, 15.6, 8.6, 9999.0]

# ----------------------------------------------------------------------------------------
# Create station record and pack
# ----------------------------------------------------------------------------------------
sta = pytdlpack.TdlpackStationRecord(station_name)
sta.pack()

# ----------------------------------------------------------------------------------------
# Create TDLPACK data record and pack
# ----------------------------------------------------------------------------------------
rec = pytdlpack.TdlpackRecord(date=date,
                              id=id,
                              lead=24,
                              plain="GFS WIND SPEED",
                              data=station_data,
                              missing_value=9999.0)
rec.pack(dec_scale=1)

# ----------------------------------------------------------------------------------------
# Open new sequential file and write the records
# ----------------------------------------------------------------------------------------
f = pytdlpack.open('new_station.sq', mode='w', format='sequential')
f.write(sta)
f.write(rec)
f.write(pytdlpack.TdlpackTrailerRecord())
f.close()