Esempio n. 1
0
 def read(self,num=None,unpack=True):
     """
     Read num records from the current position.
     """
     #pdb.set_trace()
     recs = []
     if num == 0:
         return recs
     elif num == 1:
         reclist = [self.recordnumber+1]
     elif num > 1:
         reclist = list(range(self.recordnumber+1,self.recordnumber+1+num))
     for n in reclist:
         nn = n-1 # Use this for the self._index referencing
         kwargs = {}
         self.seek(n)
         kwargs['ioctet'] = self._index['size'][nn]
         kwargs['ipack'] = np.frombuffer(self._filehandle.read(self._index['size'][nn]),dtype='>i4')
         if self._index['type'][nn] == 'data':
             kwargs['reference_date'] = self._index['date'][nn]
             rec = pytdlpack.TdlpackRecord(**kwargs)
             if unpack: rec.unpack()
             recs.append(rec)
         elif self._index['type'][nn] == 'station':
             kwargs['ipack'] = kwargs['ipack'].byteswap()
             kwargs['number_of_stations'] = np.int32(kwargs['ioctet']/pytdlpack.NCHAR)
             rec = pytdlpack.TdlpackStationRecord(**kwargs)
             if unpack: rec.unpack()
             recs.append(rec)
         elif self._index['type'][nn] == 'trailer':
             recs.append(pytdlpack.TdlpackTrailerRecord(**kwargs))
         self.recordnumber = n
     return recs
Esempio n. 2
0
import pytdlpack
import numpy as np

# ---------------------------------------------------------------------------------------- 
# Open new TDLPACK file. Here we use pytdlpack.open() which is a function that returns
# an instance (i.e. an object) of class pytdlpack.TdlpackFile.
# ---------------------------------------------------------------------------------------- 
f = pytdlpack.open("station.sq",mode="w")
print "TDLPACK FILE INFO"
print f

# ---------------------------------------------------------------------------------------- 
# Define a station list and create an instance of TdlpackStationRecord
# ---------------------------------------------------------------------------------------- 
ccall = ('KBWI','KPHL','KIAD','KLNS','KACY') # Parentheses indictates a tuple (can also be a list).
stationrec = pytdlpack.TdlpackStationRecord(ccall=ccall) # All that is needed a station call letter list/tuple.
stationrec.pack() # pack() is a method (i.e. a function that acts on a class instance).
f.write(stationrec) # write() is a method of class TdlpackFile.

# ---------------------------------------------------------------------------------------- 
# Define TDLPACK Identification Sections
#
# NOTE: The pytdlpack module contains constansts to use!  -- Like ND7
# ---------------------------------------------------------------------------------------- 
is1 = np.zeros((pytdlpack.ND7),dtype=np.int32)
is1[0] = 0 
is1[1] = 0
is1[2] = 2018
is1[3] = 1
is1[4] = 2
is1[5] = 12
Esempio n. 3
0
    def to_tdlpack(self,
                   file,
                   mode='w-',
                   compute: bool = True,
                   var_constants=None,
                   min_unique=1000):
        '''
        mode : {"w", "w-"}, optional, default: "w-"
        Persistence mode: "w" means create (overwrite if exists);
        "w-" means create (fail if exists);
        '''

        have_chunks = any(v.chunks for v in self._obj.variables.values())
        # ensuring has x/y or station dims and that any chunks do not span those dims
        if 'station' in self._obj.dims:
            station = True
            if have_chunks:
                self._obj = self._obj.chunk({'station': -1})
        elif 'x' in self._obj.dims and 'y' in self._obj.dims:
            station = False
            if have_chunks:
                self._obj = self._obj.chunk({'x': -1, 'y': -1})
        else:
            raise ValueError(
                "data does not have 'x' and 'y' or 'station' dims for writing to tdlp grid or station formats"
            )

        possible_multi_var_keys = [
            'ccc', 'fff', 'b', 'dd', 'v', 'llll', 'uuuu', 't', 'o', 'i', 's',
            'g'
        ]
        multi_var_keys = [
            k for k in possible_multi_var_keys if not self._iscoord(k)
        ]
        meta_dicts = list()
        for var in self._obj.data_vars:
            da = self._obj[var]
            meta_dicts.append(
                {key: da.encoding[f'tdlp_{key}']
                 for key in multi_var_keys})
        df = pd.DataFrame(meta_dicts).nunique()
        meta_varying_by_var = df.index[df > 1]

        meta = RequiredTdlpMeta()
        keys = list(meta.__dataclass_fields__.keys())
        coord_meta = list()
        const_meta = list()
        tdlpid = TdlpId()
        for key in keys:
            if f'tdlp_{key}' in da.encoding:
                meta[key] = da.encoding[f'tdlp_{key}']
                tdlpid[key] = meta[key]
                const_meta.append(key)
                continue
            found = False
            for coord_name in self._obj.coords:
                coord = self._obj[coord_name]
                if 'tdlp_name' in coord.attrs:
                    if coord.attrs['tdlp_name'] == key:
                        found = True
                        coord_meta.append(key)
                        meta[key] = coord
                        break
            if not found:
                raise ValueError(
                    f'to_tdlpack requres metadata for {key} be in encoding or coordinate'
                )

        filepath = Path(file)
        if mode == 'w-':
            if filepath.exists():
                raise ValueError(
                    f"{file} already exists and will not be overwritten; mode: 'w' can overwrite existing files"
                )
        elif mode == 'w':
            if filepath.is_dir():
                raise ValueError(f"cannot clobber directory {file}")

        open(filepath, 'w').close()
        store = filepath.parent / f'.{filepath.name}'
        if store.is_dir():
            logger.warning(f'removing existing hidden directory {store}')
            shutil.rmtree(store)
        store.mkdir(parents=True)

        prodicized = product(*[meta[k] for k in coord_meta])
        f = pytdlpack.open(store / filepath.name,
                           mode='w',
                           format='sequential')
        if station:
            template_rec = pytdlpack.TdlpackRecord(date=0,
                                                   id=[0, 0, 0, 0],
                                                   data=np.array([0]))
            stations = pytdlpack.TdlpackStationRecord(
                list(self._obj.station.data))
            stations.pack()
            f.write(stations)
        else:
            # the grid doesn't matter ( can tweak/clean later)
            template_rec = pytdlpack.TdlpackRecord(
                date=0,
                id=[0, 0, 0, 0],
                grid=pytdlpack.grids['nbmak'],
                data=np.array([0]))
            template_rec.is2 = da.encoding[
                'tdlp_is2']  # this loads the grid metadata
        template_rec.primary_missing_value = 9999.0

        for t in prodicized:
            for var in self._obj.data_vars:
                # select slice of array for tdlpack record
                loc = {k: v for (k, v) in zip(coord_meta, t)}
                da = self._obj[var].loc[loc].squeeze()

                # put extra metadata that varies by variable in loc for updating tdlpid
                for m in meta_varying_by_var:
                    loc[m] = da.encoding[f'tdlp_{m}']
                tdlpid.update(**loc)

                # shape data array appropriately for station or grid formatted tdlpack record
                if station:
                    data = da.data
                else:
                    data = da.data.transpose()

                # build out a tdlpack DataRecord with appropriate metadata
                idlist = [
                    tdlpid.word1, tdlpid.word2, tdlpid.word3, tdlpid.word4
                ]
                if var_constants is None:
                    plain = 'NO VAR MATCH'
                    # let dec_scale allow for min_unique values in the space between the max and min
                    datamax = np.nanmax(data)
                    datamin = np.nanmin(data)
                    if datamax == datamin or np.isnan(datamax):
                        dec_scale = 9  # data is a constant or all missing and will compress well
                    else:
                        log10range = np.log10(
                            np.nanmax(data) - np.nanmin(data))
                        range_place = np.floor(log10range)
                        dec_scale = int(
                            np.ceil(np.log10(min_unique)) - range_place)
                else:
                    plain = var_constants.loc[tdlpid.cccfff]['plain']
                    dec_scale = var_constants.loc[tdlpid.cccfff]['iscale']
                date = da.date.data.squeeze()[()]
                rec = make_record(template_rec, idlist, data, 'PLAIN TEXT',
                                  date)
                rec.pack(dec_scale=dec_scale)
                logger.debug(
                    f'writing {date}, {idlist} with dec_scale: {dec_scale}')
                f.write(rec)

        f.close()
        shutil.move(store / filepath.name, file)
        shutil.rmtree(store)
Esempio n. 4
0
    sys.version_info.major) + '.' + str(sys.version_info.minor)
sys.path.insert(0, build_path)
import pytdlpack

# ----------------------------------------------------------------------------------------
# Create some data
# ----------------------------------------------------------------------------------------
date = 2019052900
id = [4210008, 10, 24, 0]
station_name = ('KACY', 'KBWI', 'KDCA', 'KIAD', 'KPHL')
station_data = [10.3, 12.4, 15.6, 8.6, 9999.0]

# ----------------------------------------------------------------------------------------
# Create station record and pack
# ----------------------------------------------------------------------------------------
sta = pytdlpack.TdlpackStationRecord(station_name)
sta.pack()

# ----------------------------------------------------------------------------------------
# Create TDLPACK data record and pack
# ----------------------------------------------------------------------------------------
rec = pytdlpack.TdlpackRecord(date=date,
                              id=id,
                              lead=24,
                              plain="GFS WIND SPEED",
                              data=station_data,
                              missing_value=9999.0)
rec.pack(dec_scale=1)

# ----------------------------------------------------------------------------------------
# Open new sequential file and write the records