def eager_read(self, slice_=None): ds = LazyResult.retrieve_file(self.file_id) # implemented like this to allow re-use of this method from eager_head if slice_ is None: slice_ = self.tuple_slices # want just np.array, no MaskedArray; let netCDF4 do the work of replacing missing values ds.variables[self.column_name].set_auto_mask(False) # the actual read from file call data = ds.variables[self.column_name][slice_] # TODO: transpose might be required when data variables have dimensions in a different order than the # dimensions declarations # want dimension = 1 data = data.reshape(-1) attributes = ds.variables[self.column_name].__dict__ # xarray creates a pandas DatetimeIndex with Timestamps (as it should); to save time however, # a shortcut is taken to convert netCDF4 python date -> pandas timestamp -> py datetime # TODO: weld pandas DatetimeIndex & Timestamp if 'calendar' in attributes: data = np.array([str(pd.Timestamp(k).date()) for k in netCDF4.num2date(data, attributes['units'], calendar=attributes['calendar'])], dtype=np.str) # at this point, netcdf is expected to read a subset; however, it reads slightly more at the end, so slice; # self._slice is empty when using eager head if self._slice is not None and self.column_name not in self.dimensions: len_slice = self._slice.stop - self._slice.start return data[:len_slice] else: return data
def eager_read(self): # make use of cache by retrieving df = LazyResult.retrieve_file(self.table.file_id) slice_ = slice(self.table.slice_start, self.table.nrows, 1) data = df[self.name][slice_].values # treat any object dtype as str if self.dtype.char == 'O': data = data.astype(np.str) return data