def read(self, timestamp=None): #TODO: Replace ERA5 missing data (!=0) return_img = {} return_metadata = {} try: dataset = Dataset(self.filename) except IOError as e: print(e) print(" ".join([self.filename, "can not be opened"])) raise e res_lat, res_lon = get_grid_resolution( dataset.variables['latitude'][:], dataset.variables['longitude'][:]) self.grid = ERA_RegularImgGrid( res_lat, res_lon) if not self.subgrid else self.subgrid for parameter, variable in dataset.variables.items(): if parameter in self.parameter: param_metadata = {} param_data = {} for attrname in variable.ncattrs(): param_metadata.update( {str(attrname): getattr(variable, attrname)}) param_data = variable[:] param_data = param_data.flatten() return_img.update( {str(parameter): param_data[self.grid.activegpis]}) return_metadata.update({str(parameter): param_metadata}) try: return_img[parameter] except KeyError: path, thefile = os.path.split(self.filename) print('%s in %s is corrupt - filling' 'image with NaN values' % (parameter, thefile)) return_img[parameter] = np.empty(self.grid.n_gpi).fill( np.nan) return_metadata['corrupt_parameters'].append() dataset.close() if self.array_1D: return Image(self.grid.activearrlon, self.grid.activearrlat, return_img, return_metadata, timestamp) else: for key in return_img: nlat = np.unique(self.grid.activearrlat).size nlon = np.unique(self.grid.activearrlon).size return_img[key] = return_img[key].reshape((nlat, nlon)) return Image(self.grid.activearrlon.reshape(nlat, nlon), self.grid.activearrlat.reshape(nlat, nlon), return_img, return_metadata, timestamp)
def read(self, timestamp=None): ''' Read a single SMOS image, if it exists, else read an empty image Parameters -------- timestamp : datetime, optional (default:None) Time stamp for the image to read ''' if self.grid is None: self.grid = EASE25CellGrid() try: return_img, return_metadata = self.read_img() except IOError: warnings.warn( 'Error loading image for {}, generating empty image instead'. format(timestamp.date())) return_img, return_metadata = self.read_empty() if self.flatten: return Image(self.grid.activearrlon, self.grid.activearrlat, return_img, return_metadata, timestamp) else: yres, xres = self.grid.shape for key in return_img: return_img[key] = np.flipud(return_img[key].reshape( xres, yres)) return Image(self.grid.activearrlon.reshape(xres, yres), np.flipud(self.grid.activearrlat.reshape(xres, yres)), return_img, return_metadata, timestamp)
def read(self, timestamp=None): # Returns the selected parameters for a ESA CCI SM image and according metadata return_img = {} return_metadata = {} try: dataset = Dataset(self.filename) except IOError as e: print(e) print(" ".join([self.filename, "can not be opened"])) raise e param_names = [] for parameter in self.parameters: param_names.append(parameter) for parameter, variable in dataset.variables.items(): if parameter in param_names: param_metadata = {} param_data = {} for attrname in variable.ncattrs(): if attrname in ['long_name', 'units']: param_metadata.update( {str(attrname): getattr(variable, attrname)}) # param_data = dataset.variables[parameter][:].flatten() param_data = dataset.variables[parameter][:] param_data = np.flipud(param_data[0, :, :]).flatten() np.ma.set_fill_value(param_data, 9999) return_img.update( {str(parameter): param_data[self.grid.activegpis]}) return_metadata.update({str(parameter): param_metadata}) # Check for corrupt files try: return_img[parameter] except KeyError: path, thefile = os.path.split(self.filename) print( '%s in %s is corrupt - filling image with NaN values' % (parameter, thefile)) return_img[parameter] = np.empty(self.grid.n_gpi).fill( np.nan) return_metadata['corrupt_parameters'].append() dataset.close() if self.array_1D: return Image(self.grid.activearrlon, self.grid.activearrlat, return_img, return_metadata, timestamp) else: for key in return_img: return_img[key] = np.flipud(return_img[key].reshape( (720, 1440))) return Image((self.grid.activearrlon.reshape((720, 1440))), np.flipud(self.grid.activearrlat.reshape( (720, 1440))), return_img, return_metadata, timestamp)
def read(self, timestamp=None): """ Reads a single C3S image. Parameters ------- timestamp: datetime, optional (default: None) Timestamp for file to read. Pass None if file contains only 1 timestamp Returns ------- image : Image Image object from netcdf content """ ds = Dataset(self.filename, mode='r') param_img = {} img_meta = {'global': {}} if self.parameters[0] is None: parameters = ds.variables.keys() else: parameters = self.parameters for param in parameters: if param in ['lat', 'lon', 'time']: continue param_metadata = {} variable = ds.variables[param] for attr in variable.ncattrs(): param_metadata.update({str(attr): getattr(variable, attr)}) param_data = np.flipud(variable[0][:].filled()).flatten() param_img[str(param)] = param_data[self.grid.activegpis] img_meta[param] = param_metadata # add global attributes for attr in ds.ncattrs(): img_meta['global'][attr] = ds.getncattr(attr) ds.close() if self.array_1D: return Image(self.grid.activearrlon, self.grid.activearrlat, param_img, img_meta, timestamp) else: yres, xres = self.grid.shape for key in param_img: param_img[key] = param_img[key].reshape(xres, yres) return Image(self.grid.activearrlon.reshape(xres, yres), self.grid.activearrlat.reshape(xres, yres), param_img, img_meta, timestamp)
def read(self, timestamp=None): """ Read a single C3S image, if it exists, otherwise fill an empty image. Parameters ---------- timestamp : datetime, optional (default: None) Time stamp of the image, if this is passed, it is compared to the time stamp from the loaded file and must match """ data, var_meta, glob_meta, img_timestamp = self._read_flat_img() if timestamp is not None: if img_timestamp is None: img_timestamp = timestamp assert img_timestamp == timestamp, "Time stamps do not match" # when flattened, this drops already all non-active gpis data = self._mask_and_reshape(data) if self.flatten: return Image(self.subgrid.activearrlon, self.subgrid.activearrlat, data, var_meta, timestamp) else: # also cut 2d case to active area min_lat, min_lon = self.subgrid.activearrlat.min(), \ self.subgrid.activearrlon.min() max_lat, max_lon = self.subgrid.activearrlat.max(), \ self.subgrid.activearrlon.max() corners = self.grid.gpi2rowcol([ self.grid.find_nearest_gpi(min_lon, min_lat)[0], # llc self.grid.find_nearest_gpi(max_lon, min_lat)[0], # lrc self.grid.find_nearest_gpi(max_lon, max_lat)[0], # urc ]) rows = slice(corners[0][0], corners[0][2] + 1) cols = slice(corners[1][0], corners[1][1] + 1) return Image(self.grid.arrlon.reshape(*self.shape)[rows, cols], np.flipud(self.grid.arrlat.reshape(*self.shape)[rows, cols]), {k: np.flipud(v[rows, cols]) for k, v in data.items()}, var_meta, timestamp)
def read(self, timestamp, **kwargs): """ Read a single image at a given timestamp. Raises `KeyError` if timestamp is not available in the dataset. Parameters ---------- timestamp : datetime.datetime Timestamp of image of interest Returns ------- img_dict : dict Dictionary containing the image data as numpy array, using the parameter name as key. Raises ------ KeyError """ try: data = { self.parameter: self.dataset[self.parameter] .sel(time=timestamp) .values[self.grid.activegpis] } return Image(self.lon, self.lat, data, self.metadata, timestamp) except KeyError: # pragma: no cover raise KeyError( f"Timestamp {timestamp} is not available in the dataset!" )
def read_eps_l2(filename, timestamp): """ Use of correct lvl2 reader and data preparation. """ data = {} eps_file = read_eps(filename) ptype = eps_file.mphr['PRODUCT_TYPE'] fmv = int(eps_file.mphr['FORMAT_MAJOR_VERSION']) if (ptype == 'SMR') or (ptype == 'SMO'): if fmv == 12: raw_data, metadata = read_smx_fmv_12(eps_file) else: raise RuntimeError("SMX format version not supported.") beams = ['f_', 'm_', 'a_'] for field in raw_data: if len(raw_data[field].shape) == 1: data[field] = raw_data[field] elif len(raw_data[field].shape) == 2: for i, beam in enumerate(beams): data[beam + field] = raw_data[field][:, i] else: raise RuntimeError("Unexpected variable shape.") longitude = data.pop('LONGITUDE') latitude = data.pop('LATITUDE') else: raise ValueError("Format not supported. Product type {:1}" " Format major version: {:2}".format(ptype, fmv)) return Image(longitude, latitude, data, metadata, timestamp, timekey='jd')
def read(self, timestamp=None): """ Read specific image for given datetime timestamp. Parameters ---------- timestamp : datetime.datetime exact observation timestamp of the image that should be read Returns ------- data : dict dictionary of numpy arrays that hold the image data for each variable of the dataset metadata : dict dictionary of numpy arrays that hold the metadata timestamp : datetime.datetime exact timestamp of the image lon : numpy.array or None array of longitudes, if None self.grid will be assumed lat : numpy.array or None array of latitudes, if None self.grid will be assumed time_var : string or None variable name of observation times in the data dict, if None all observations have the same timestamp """ if self.pygrib1: param_names = { '40': 'SM_layer1_0-7cm', '41': 'SM_layer2_7-28cm', '42': 'SM_layer3_28-100cm', '43': 'SM_layer4_100-289cm' } else: param_names = { 'SWI1 Soil wetness index in layer 1': 'SM_layer1_0-7cm', 'SWI2 Soil wetness index in layer 2': 'SM_layer2_7-28cm', 'SWI3 Soil wetness index in layer 3': 'SM_layer3_28-100cm', 'SWI4 Soil wetness index in layer 4': 'SM_layer4_100-289cm' } data = {} metadata = {} with pygrib.open(self.filename) as grb: for i, message in enumerate(grb): message.expand_grid(self.expand_grid) if i == 1: lats, lons = message.latlons() data[param_names[message['parameterName']]] = message.values # read and store metadata md = {} for k in self.metadata_fields: if message.valid_key(k): md[k] = message[k] metadata[param_names[message['parameterName']]] = md return Image(lons, lats, data, metadata, timestamp)
def eps_dict2generic(native_img): """ Convert dict of eps Images into a dict with generic Images Parameters ---------- native_img : pygeobase.object_base.Image Native image. Returns ------- img : list of pygeobase.object_base.Image Generic images. """ img = { 'img1': {}, 'img2': {}, 'img3': {}, 'img4': {}, 'img5': {}, 'img6': {} } for szf_img in native_img: n_records = native_img[szf_img].lat.shape[0] generic_data = get_template_ASCATL1B_SZF(n_records) fields = [('jd', 'jd'), ('sat_id', None), ('beam_number', 'BEAM_NUMBER'), ('abs_orbit_nr', None), ('as_des_pass', 'AS_DES_PASS'), ('azi', 'AZI_ANGLE_FULL'), ('inc', 'INC_ANGLE_FULL'), ('sig', 'SIGMA0_FULL'), ('land_frac', 'LAND_FRAC'), ('flagfield_rf1', 'FLAGFIELD_RF1'), ('flagfield_rf2', 'FLAGFIELD_RF2'), ('flagfield_pl', 'FLAGFIELD_PL'), ('flagfield_gen1', 'FLAGFIELD_GEN1'), ('flagfield_gen2', 'FLAGFIELD_GEN2'), ('land_flag', 'F_LAND'), ('usable_flag', 'F_USABLE')] for field in fields: if field[1] is None: continue generic_data[field[0]] = native_img[szf_img].data[field[1]] fields = [('sat_id', 'SPACECRAFT_ID'), ('abs_orbit_nr', 'ORBIT_START')] for field in fields: generic_data[field[0]] = native_img[szf_img].metadata[ field[1]].repeat(n_records) # convert sat_id (spacecraft id) to the intern definition sat_id_lut = np.array([0, 4, 3, 5]) generic_data['sat_id'] = sat_id_lut[generic_data['sat_id']] img[szf_img] = Image(native_img[szf_img].lon, native_img[szf_img].lat, generic_data, native_img[szf_img].metadata, native_img[szf_img].timestamp, timekey='jd') return img
def read(self, timestamp=None): #TODO: Replace ERA5 missing data (!=0) grbs = pygrib.open(self.filename) img = {} metadata = {} for message in grbs: param_name = message.short_name if param_name not in self.parameter: continue metadata[param_name] = {} message.expand_grid(self.expand_grid) img[param_name] = message.values lats, lons = message.latlons() metadata[param_name]['units'] = message['units'] metadata[param_name]['long_name'] = message['parameterName'] if 'levels' in message.keys(): metadata[param_name]['depth'] = '{:} cm'.format( message['levels']) if 'level' in message.keys(): metadata[param_name]['depth'] = '{:} cm'.format( message['level']) grbs.close() lons_gt_180 = np.where(lons > 180.0) lons[lons_gt_180] = lons[lons_gt_180] - 360 return Image(lons, lats, img, metadata, timestamp)
def read(self, timestamp=None): return_img = {} metadata_img = {} try: ds = h5py.File(self.filename) except IOError as e: print(e) print(" ".join([self.filename, "can not be opened"])) raise e overpass_str = '_' + self.overpass.upper() if self.overpass else '' sm_field = 'Soil_Moisture_Retrieval_Data%s' % overpass_str if sm_field not in ds.keys(): raise NameError( sm_field, 'Field does not exists. Try deactivating overpass') if self.overpass: overpass_str = '_' + self.overpass.lower() if self.overpass.upper( ) == 'PM' else '' else: overpass_str = '' latitude = ds[sm_field]['latitude%s' % overpass_str][:] longitude = ds[sm_field]['longitude%s' % overpass_str][:] for parameter in self.parameters: metadata = {} param = ds[sm_field][parameter + overpass_str] data = param[:] # mask according to valid_min, valid_max and _FillValue try: fill_value = param.attrs['_FillValue'] valid_min = param.attrs['valid_min'] valid_max = param.attrs['valid_max'] except KeyError: pass # fill metadata dictionary with metadata from image for key in param.attrs: metadata[key] = param.attrs[key] data = np.where(np.logical_or(data < valid_min, data > valid_max), fill_value, data) return_img[parameter + overpass_str] = data metadata_img[parameter + overpass_str] = metadata if self.flatten: longitude = longitude.flatten() latitude = latitude.flatten() for param in self.parameters: return_img[param + overpass_str] = return_img[param + overpass_str].flatten() return Image(longitude, latitude, return_img, metadata_img, timestamp)
def read(self, timestamp=None, additional_kw=None): if timestamp == datetime(2016, 1, 1): raise IOError("no data for day") # 2x2 pixels around zero lat, lon return Image(np.array([0.5, 0.5, -0.5, -0.5]), np.array([1, -1, 1, -1]), {'var1': np.ones(4) * timestamp.day}, {'kw': additional_kw}, timestamp)
def test_getitem(): lon = np.array([1, 2, 3], dtype=np.float32) lat = np.array([1, 2, 3], dtype=np.float32) data = { 'variable': np.array([1, 2, 3], dtype=np.int16), 'jd': np.array([5, 6, 7], dtype=np.float32) } metadata = {'attribute': 'test'} timestamp = datetime(2000, 1, 1, 12) img = Image(lon, lat, data, metadata, timestamp, timekey='jd') nptest.assert_allclose(img['jd'], data['jd']) nptest.assert_allclose(img['variable'], data['variable'])
def read(self, timestamp): """ Read a single SMOS image, if it exists, otherwise fill an empty image Parameters -------- timestamp : datetime Time stamp for the image to read. """ if timestamp is None: raise ValueError("No time stamp passed") try: return_img, return_metadata = self._read_img() except IOError: warnings.warn('Error loading image for {}, ' 'generating empty image instead'.format(timestamp.date())) return_img, return_metadata = self._read_empty() if self.flatten: self.img = Image(self.grid.activearrlon, self.grid.activearrlat, return_img, return_metadata, timestamp) else: try: shape = self.grid.subset_shape except AttributeError: shape = self.grid.shape rows, cols = shape for key in return_img: return_img[key] = np.flipud(return_img[key].reshape(rows, cols)) self.img = Image(self.grid.activearrlon.reshape(rows, cols), np.flipud(self.grid.activearrlat.reshape(rows, cols)), return_img, return_metadata, timestamp) return self.img
def read(self, timestamp=None): """ reads from the netCDF file given by the filename Returns ------- data : pygeobase.object_base.Image """ if self.ds is None: self.ds = netCDF4.Dataset(self.filename) if self.nc_variables is None: var_to_read = self.ds.variables.keys() else: var_to_read = self.nc_variables # make sure that essential variables are read always: if 'latitude' not in var_to_read: var_to_read.append('latitude') if 'longitude' not in var_to_read: var_to_read.append('longitude') # store data in dictionary dd = {} num_cells = self.ds.dimensions['numCells'].size for name in var_to_read: variable = self.ds.variables[name] dd[name] = variable[:].flatten() if len(variable.shape) == 1: # If the data is 1D then we repeat it for each cell dd[name] = np.repeat(dd[name], num_cells) if name == 'utc_line_nodes': utc_dates = netCDF4.num2date(dd[name], variable.units) dd['jd'] = netCDF4.netcdftime.JulianDayFromDate(utc_dates) if 'soil_moisture' in dd: # mask all the arrays based on fill_value of latitude valid_data = ~dd['soil_moisture'].mask for name in dd: dd[name] = dd[name][valid_data] longitude = dd.pop('longitude') latitude = dd.pop('latitude') return Image(longitude, latitude, dd, {}, timestamp, timekey='utc_line_nodes')
def test_dtype_property(): lon = np.array([1, 2, 3], dtype=np.float32) lat = np.array([1, 2, 3], dtype=np.float32) data = { 'variable': np.array([1, 2, 3], dtype=np.int16), 'jd': np.array([5, 6, 7], dtype=np.float32) } metadata = {'attribute': 'test'} timestamp = datetime(2000, 1, 1, 12) img = Image(lon, lat, data, metadata, timestamp, timekey='jd') assert np.dtype([('jd', np.float32), ('variable', np.int16)]) == img.dtype assert sorted(list(img.dtype.fields)) == ['jd', 'variable'] assert img.dtype.names == ('jd', 'variable')
def read(self, interval, **kwargs): """ Return an image for a specific interval. Parameters ---------- interval : tuple (start, end) Returns ------- image : object pygeobase.object_base.Image object """ start, end = interval timestamps = super(IntervalReadingMixin, self).tstamps_for_daterange(start, end) if len(timestamps) == 0: return None dataset = {} metadataset = {} lons = [] lats = [] for timestamp in timestamps: img = super(IntervalReadingMixin, self).read(timestamp) for key in img.data: if key not in dataset: dataset[key] = [] dataset[key].append(img.data[key]) metadataset[timestamp] = img.metadata lons.append(img.lon) lats.append(img.lat) for key in dataset: dataset[key] = np.concatenate(dataset[key]) lons = np.concatenate(lons) lats = np.concatenate(lats) return Image(lons, lats, dataset, metadataset, interval[0], timekey=img.timekey)
def test_tuple_unpacking_no_timekey(): lon = np.array([1, 2, 3]) lat = np.array([1, 2, 3]) data = {'variable': np.array([1, 2, 3]), 'jd': np.array([5, 6, 7])} metadata = {'attribute': 'test'} timestamp = datetime(2000, 1, 1, 12) img = Image(lon, lat, data, metadata, timestamp) (return_data, return_metadata, return_timestamp, return_lon, return_lat, times) = img for key in data: nptest.assert_allclose(return_data[key], data[key]) assert return_metadata == metadata assert return_timestamp == timestamp nptest.assert_allclose(return_lon, lon) nptest.assert_allclose(return_lat, lat) assert times is None
def _assemble_img(self, timestamp, **kwargs): return_img = {} metadata_img = {} for parameter in self.parameters: custom_templ = self.filename_template % ( parameter, timestamp.strftime('%Y%m%d_%H%M')) grb_file = self._build_filename(timestamp, custom_templ=custom_templ) self._open(grb_file) (data, metadata, timestamp, lon, lat, time) = self.fid.read(timestamp=timestamp, **kwargs) return_img[parameter] = data metadata_img[parameter] = metadata return Image(lon, lat, return_img, metadata_img, timestamp)
def read(self, timestamp=None): return_img = {} metadata_img = {} try: ds = h5py.File(self.filename) except IOError as e: print(e) print(" ".join([self.filename, "can not be opened"])) raise e latitude = ds['Soil_Moisture_Retrieval_Data']['latitude'][:] longitude = ds['Soil_Moisture_Retrieval_Data']['longitude'][:] for parameter in self.parameters: metadata = {} param = ds['Soil_Moisture_Retrieval_Data'][parameter] data = param[:] # mask according to valid_min, valid_max and _FillValue try: fill_value = param.attrs['_FillValue'] valid_min = param.attrs['valid_min'] valid_max = param.attrs['valid_max'] except KeyError: pass # fill metadata dictionary with metadata from image for key in param.attrs: metadata[key] = param.attrs[key] data = np.where(np.logical_or(data < valid_min, data > valid_max), fill_value, data) return_img[parameter] = data metadata_img[parameter] = metadata if self.flatten: longitude = longitude.flatten() latitude = latitude.flatten() for param in self.parameters: return_img[param] = return_img[param].flatten() return Image(longitude, latitude, return_img, metadata_img, timestamp)
def read(self, timestamp=None): grbs = pygrib.open(self.filename) if grbs.messages != 1: grbs.close() raise IOError("Wrong number of messages in file") metadata = {} for message in grbs: message.expand_grid(self.expand_grid) image = message.values lats, lons = message.latlons() metadata['units'] = message['units'] metadata['long_name'] = message['parameterName'] metadata['depth'] = message['levels'] + ' cm' grbs.close() lons_gt_180 = np.where(lons > 180.0) lons[lons_gt_180] = lons[lons_gt_180] - 360 return Image(lons, lats, image, metadata, timestamp)
def read(self, timestamp=None, lat_lon_bbox=None): """ Read specific image for given datetime timestamp. Parameters ---------- filename : string filename timestamp : datetime.datetime exact observation timestamp of the image that should be read lat_lon_bbox : list, optional list of lat,lon cooridnates of bounding box [lat_min, lat_max, lon_min, lon_max] Returns ------- data : dict or None dictionary of numpy arrays that hold the image data for each variable of the dataset, if no data was found None is returned metadata : dict dictionary of numpy arrays that hold the metadata timestamp : datetime.datetime exact timestamp of the image lon : numpy.array or None array of longitudes, if None self.grid will be assumed lat : numpy.array or None array of latitudes, if None self.grid will be assumed time_var : string or None variable name of observation times in the data dict, if None all observations have the same timestamp """ zipped = False if os.path.splitext(self.filename)[1] == '.gz': zipped = True # for zipped files use an unzipped temporary copy if zipped: with NamedTemporaryFile(delete=False) as tmp_fid: with GzipFile(self.filename) as gz_fid: tmp_fid.write(gz_fid.read()) filename = tmp_fid.name else: filename = self.filename with bufr_reader.BUFRReader(filename) as bufr: lons = [] ssm = [] ssm_noise = [] ssm_corr_flag = [] ssm_proc_flag = [] data_in_bbox = True for i, message in enumerate(bufr.messages()): if i == 0: # first message is just lat, lon extent # check if any data in bbox if lat_lon_bbox is not None: lon_min, lon_max = message[0, 2], message[0, 3] lat_min, lat_max = message[0, 4], message[0, 5] if (lat_lon_bbox[0] > lat_max or lat_lon_bbox[1] < lat_min or lat_lon_bbox[2] > lon_max or lat_lon_bbox[3] < lon_min): data_in_bbox = False break elif data_in_bbox: # first 5 elements are there only once, after that, # 4 elements are repeated till the end of the array # these 4 are ssm, ssm_noise, ssm_corr_flag and # ssm_proc_flag each message contains the values for # 120 lons between lat_min and lat_max the grid spacing # is 0.00416667 degrees lons.append(message[:, 0]) lat_min = message[0, 1] lat_max = message[0, 2] ssm.append(message[:, 4::4]) ssm_noise.append(message[:, 5::4]) ssm_corr_flag.append(message[:, 6::4]) ssm_proc_flag.append(message[:, 7::4]) if zipped: os.remove(filename) if data_in_bbox: ssm = np.rot90(np.vstack(ssm)).astype(np.float32) ssm_noise = np.rot90(np.vstack(ssm_noise)).astype(np.float32) ssm_corr_flag = np.rot90(np.vstack(ssm_corr_flag)).astype( np.float32) ssm_proc_flag = np.rot90(np.vstack(ssm_proc_flag)).astype( np.float32) lats_dim = np.linspace(lat_max, lat_min, ssm.shape[0]) lons_dim = np.concatenate(lons) data = { 'ssm': ssm, 'ssm_noise': ssm_noise, 'proc_flag': ssm_proc_flag, 'corr_flag': ssm_corr_flag } # if there are is a gap in the image it is not a 2D array in # lon, lat space but has a jump in latitude or longitude # detect a jump in lon or lat spacing lon_jump_ind = np.where(np.diff(lons_dim) > 0.00418)[0] if lon_jump_ind.size > 1: print("More than one jump in longitude") if lon_jump_ind.size == 1: lon_jump_ind = lon_jump_ind[0] diff_lon_jump = np.abs(lons_dim[lon_jump_ind] - lons_dim[lon_jump_ind + 1]) missing_elements = int(np.round(diff_lon_jump / 0.00416666)) missing_lons = np.linspace(lons_dim[lon_jump_ind], lons_dim[lon_jump_ind + 1], missing_elements, endpoint=False) # fill up longitude dimension to full grid lons_dim = np.concatenate([ lons_dim[:lon_jump_ind], missing_lons, lons_dim[lon_jump_ind + 1:] ]) # fill data with NaN values empty = np.empty((lats_dim.shape[0], missing_elements)) empty.fill(1e38) for key in data: data[key] = np.concatenate([ data[key][:, :lon_jump_ind], empty, data[key][:, lon_jump_ind + 1:] ], axis=1) lat_jump_ind = np.where(np.diff(lats_dim) > 0.00418)[0] if lat_jump_ind.size > 1: print("More than one jump in latitude") if lat_jump_ind.size == 1: diff_lat_jump = np.abs(lats_dim[lat_jump_ind] - lats_dim[lat_jump_ind + 1]) missing_elements = np.round(diff_lat_jump / 0.00416666) missing_lats = np.linspace(lats_dim[lat_jump_ind], lats_dim[lat_jump_ind + 1], missing_elements, endpoint=False) # fill up longitude dimension to full grid lats_dim = np.concatenate([ lats_dim[:lat_jump_ind], missing_lats, lats_dim[lat_jump_ind + 1:] ]) # fill data with NaN values empty = np.empty((missing_elements, lons_dim.shape[0])) empty.fill(1e38) for key in data: data[key] = np.concatenate([ data[key][:lat_jump_ind, :], empty, data[key][lat_jump_ind + 1:, :] ], axis=0) lons, lats = np.meshgrid(lons_dim, lats_dim) # only return data in bbox if lat_lon_bbox is not None: data_ind = np.where((lats >= lat_lon_bbox[0]) & (lats <= lat_lon_bbox[1]) & (lons >= lat_lon_bbox[2]) & (lons <= lat_lon_bbox[3])) # indexing returns 1d array # get shape of lats_dim and lons_dim to be able to reshape # the 1d arrays to the correct 2d shapes lats_dim_shape = \ np.where((lats_dim >= lat_lon_bbox[0]) & (lats_dim <= lat_lon_bbox[1]))[0].shape[0] lons_dim_shape = \ np.where((lons_dim >= lat_lon_bbox[2]) & (lons_dim <= lat_lon_bbox[3]))[0].shape[0] lons = lons[data_ind].reshape(lats_dim_shape, lons_dim_shape) lats = lats[data_ind].reshape(lats_dim_shape, lons_dim_shape) for key in data: data[key] = data[key][data_ind].reshape( lats_dim_shape, lons_dim_shape) else: lons = None lats = None data = None return Image(lons, lats, data, {}, timestamp)
def read(self, timestamp=None, additional_kw=None): return Image(None, None, None, {'kw': additional_kw}, timestamp)
def read(self, timestamp=None): """ Read specific image for given datetime timestamp. Parameters ---------- timestamp : datetime.datetime exact observation timestamp of the image that should be read Returns ------- data : dict dictionary of numpy arrays that hold the image data for each variable of the dataset metadata : dict dictionary of numpy arrays that hold the metadata timestamp : datetime.datetime exact timestamp of the image lon : numpy.array or None array of longitudes, if None self.grid will be assumed lat : numpy.array or None array of latitudes, if None self.grid will be assumed time_var : string or None variable name of observation times in the data dict, if None all observations have the same timestamp """ # lookup table between names and message number in the BUFR file data = {} dates = [] # 13: Latitude (High Accuracy) latitude = [] # 14: Longitude (High Accuracy) longitude = [] with BUFRReader(self.filename) as bufr: for message in bufr.messages(): # read fixed fields latitude.append(message[:, 12]) longitude.append(message[:, 13]) years = message[:, 6].astype(int) months = message[:, 7].astype(int) days = message[:, 8].astype(int) hours = message[:, 9].astype(int) minutes = message[:, 10].astype(int) seconds = message[:, 11].astype(int) df = pd.to_datetime(pd.DataFrame({'month': months, 'year': years, 'day': days, 'hour': hours, 'minute': minutes, 'second': seconds})) dates.append(pd.DatetimeIndex(df).to_julian_date().values) # read optional data fields for mid in self.msg_name_lookup: name = self.msg_name_lookup[mid] if name not in data: data[name] = [] data[name].append(message[:, mid - 1]) dates = np.concatenate(dates) longitude = np.concatenate(longitude) latitude = np.concatenate(latitude) for mid in self.msg_name_lookup: name = self.msg_name_lookup[mid] data[name] = np.concatenate(data[name]) if mid == 74: # ssm mean is encoded differently data[name] = data[name] * 100 data['jd'] = dates if 65 in self.msg_name_lookup: # mask all the arrays based on fill_value of soil moisture valid_data = np.where(data[self.msg_name_lookup[65]] != 1.7e+38) latitude = latitude[valid_data] longitude = longitude[valid_data] for name in data: data[name] = data[name][valid_data] return Image(longitude, latitude, data, {}, timestamp, timekey='jd')
def read(self, timestamp=None): """ Read data from the loaded image file. Parameters --------- timestamp : datetime, optional (default: None) Specific date (time) to read the data for. """ grbs = pygrib.open(self.filename) grid = self.subgrid return_img = {} return_metadata = {} var_msg_lut = {p: None for p in self.parameter} sea_mask = None for N in range(grbs.messages): n = N + 1 message = grbs.message(n) param_name = str(message.cfVarNameECMF) if param_name == "lsm": if self.mask_seapoints and sea_mask is None: sea_mask = message.values.flatten() if param_name not in self.parameter: continue else: var_msg_lut[param_name] = n # available variables shape = None for param_name, n in var_msg_lut.items(): if n is None: continue return_metadata[param_name] = {} message = grbs.message(n) param_data = message.values.flatten() if not shape: shape = param_data.shape return_img[param_name] = param_data if grid is None: lats, lons = message.latlons() try: res_lat, res_lon = get_grid_resolution(lats, lons) grid = ERA_RegularImgGrid(res_lat, res_lon) except ValueError: # when grid not regular lons_gt_180 = np.where(lons > 180.0) lons[lons_gt_180] = lons[lons_gt_180] - 360 grid = ERA_IrregularImgGrid(lons, lats) return_metadata[param_name]["units"] = message["units"] return_metadata[param_name]["long_name"] = \ message["parameterName"] if "levels" in message.keys(): return_metadata[param_name]["depth"] = "{:} cm".format( message["levels"]) if self.mask_seapoints: if sea_mask is None: raise IOError( "No land sea mask parameter (lsm) in passed image" " for masking.") else: # mask the loaded data for name in return_img.keys(): param_data = return_img[name] param_data = np.ma.array( param_data, mask=np.logical_not(sea_mask), fill_value=np.nan, ) param_data = param_data.filled() return_img[name] = param_data grbs.close() # missing variables for param_name, n in var_msg_lut.items(): if n is not None: continue param_data = np.full(shape, np.nan) warnings.warn("Cannot load variable {var} from file {thefile}. " "Filling image with NaNs.".format( var=param_name, thefile=self.filename)) return_img[param_name] = param_data return_metadata[param_name] = {} return_metadata[param_name]["long_name"] = lookup( self.product, [param_name]).iloc[0]["long_name"] if self.array_1D: return Image( grid.activearrlon, grid.activearrlat, return_img, return_metadata, timestamp, ) else: nlat = np.unique(grid.activearrlat).size nlon = np.unique(grid.activearrlon).size for key in return_img: return_img[key] = return_img[key].reshape((nlat, nlon)) return Image( grid.activearrlon.reshape(nlat, nlon), grid.activearrlat.reshape(nlat, nlon), return_img, return_metadata, timestamp, )
def read(self, timestamp=None): """ Read data from the loaded image file. Parameters --------- timestamp : datetime, optional (default: None) Specific date (time) to read the data for. """ return_img = {} return_metadata = {} try: dataset = xr.open_dataset(self.filename, engine="netcdf4", mask_and_scale=True) except IOError as e: print(e) print(" ".join([self.filename, "can not be opened"])) raise e res_lat, res_lon = get_grid_resolution( dataset.variables["latitude"][:], dataset.variables["longitude"][:]) grid = (ERA_RegularImgGrid(res_lat, res_lon) if not self.subgrid else self.subgrid) if self.mask_seapoints: if "lsm" not in dataset.variables.keys(): raise IOError("No land sea mask parameter (lsm) in" " passed image for masking.") else: sea_mask = dataset.variables["lsm"].values for name in dataset.variables: if name in self.parameter: variable = dataset[name] param_data = variable.data if self.mask_seapoints: param_data = np.ma.array( param_data, mask=np.logical_not(sea_mask), fill_value=np.nan, ) param_data = param_data.filled() param_data = param_data.flatten() return_metadata[name] = variable.attrs return_img.update( dict([(str(name), param_data[grid.activegpis])])) try: return_img[name] except KeyError: path, thefile = os.path.split(self.filename) warnings.warn( "Cannot load variable {var} from file {thefile}. " "Filling image with NaNs.".format(var=name, thefile=thefile)) return_img[name] = np.empty(grid.activegpis.size).fill( np.nan) dataset.close() if self.array_1D: return Image( grid.activearrlon, grid.activearrlat, return_img, return_metadata, timestamp, ) else: nlat = np.unique(grid.activearrlat).size nlon = np.unique(grid.activearrlon).size for key in return_img: return_img[key] = return_img[key].reshape((nlat, nlon)) return Image( grid.activearrlon.reshape(nlat, nlon), grid.activearrlat.reshape(nlat, nlon), return_img, return_metadata, timestamp, )
def read(self, timestamp=None): """ reads from the netCDF file given by the filename Returns ------- data : pygeobase.object_base.Image """ if self.ds is None: self.ds = netCDF4.Dataset(self.filename) if self.nc_variables is None: var_to_read = self.ds.variables.keys() else: var_to_read = self.nc_variables # make sure that essential variables are read always: if 'latitude' not in var_to_read: var_to_read.append('latitude') if 'longitude' not in var_to_read: var_to_read.append('longitude') # store data in dictionary dd = {} metadata = {} beams = ['f_', 'm_', 'a_'] metadata['sat_id'] = self.ds.platform[-1] metadata['orbit_start'] = self.ds.start_orbit_number metadata['processor_major_version'] = self.ds.processor_major_version metadata['product_minor_version'] = self.ds.product_minor_version metadata['format_major_version'] = self.ds.format_major_version metadata['format_minor_version'] = self.ds.format_minor_version num_cells = self.ds.dimensions['numCells'].size for name in var_to_read: variable = self.ds.variables[name] if len(variable.shape) == 1: # If the data is 1D then we repeat it for each cell dd[name] = variable[:].flatten() dd[name] = np.repeat(dd[name], num_cells) elif len(variable.shape) == 2: dd[name] = variable[:].flatten() elif len(variable.shape) == 3: # length of 3 means it is triplet data, so we split it for i, beam in enumerate(beams): dd[beam + name] = variable[:, :, i].flatten() if name == 'azi_angle_trip': mask = dd[beam + name] < 0 dd[beam + name][mask] += 360 else: raise RuntimeError("Unexpected variable shape.") if name == 'utc_line_nodes': utc_dates = netCDF4.num2date( dd[name], variable.units).astype('datetime64[ns]') dd['jd'] = (utc_dates - ref_dt)/np.timedelta64(1, 'D') + ref_jd dd['as_des_pass'] = (dd['sat_track_azi'] < 270).astype(np.uint8) longitude = dd.pop('longitude') latitude = dd.pop('latitude') n_records = latitude.shape[0] n_lines = n_records // num_cells dd['node_num'] = np.tile((np.arange(num_cells) + 1), n_lines) dd['line_num'] = np.arange(n_lines).repeat(num_cells) return Image(longitude, latitude, dd, metadata, timestamp, timekey='utc_line_nodes')
def read_eps_l1b(filename, timestamp): """ Use of correct lvl1b reader and data preparation. """ data = {} eps_file = read_eps(filename) ptype = eps_file.mphr['PRODUCT_TYPE'] fmv = int(eps_file.mphr['FORMAT_MAJOR_VERSION']) if ptype == 'SZF': image_dict = { 'img1': {}, 'img2': {}, 'img3': {}, 'img4': {}, 'img5': {}, 'img6': {} } data_full = { 'd1': {}, 'd2': {}, 'd3': {}, 'd4': {}, 'd5': {}, 'd6': {} } if fmv == 12: raw_data, metadata, orbit_grid = read_szf_fmv_12(eps_file) else: raise RuntimeError("SZF format version not supported.") for field in raw_data: data[field] = raw_data[field] # separate data into single beam images for i in range(1, 7): dataset = 'd' + str(i) img = 'img' + str(i) mask = ((data['BEAM_NUMBER']) == i) for field in data: data_full[dataset][field] = data[field][mask] lon = data_full[dataset].pop('LONGITUDE_FULL') lat = data_full[dataset].pop('LATITUDE_FULL') image_dict[img] = Image(lon, lat, data_full[dataset], metadata, timestamp, timekey='jd') return image_dict elif (ptype == 'SZR') or (ptype == 'SZO'): if fmv == 11: raw_data, metadata = read_szx_fmv_11(eps_file) elif fmv == 12: raw_data, metadata = read_szx_fmv_12(eps_file) else: raise RuntimeError("SZX format version not supported.") beams = ['f_', 'm_', 'a_'] for field in raw_data: if len(raw_data[field].shape) == 1: data[field] = raw_data[field] # split data if it is triplet data elif len(raw_data[field].shape) == 2: for i, beam in enumerate(beams): data[beam + field] = raw_data[field][:, i] else: raise RuntimeError("Unexpected variable shape.") longitude = data.pop('LONGITUDE') latitude = data.pop('LATITUDE') return Image(longitude, latitude, data, metadata, timestamp, timekey='jd') else: raise ValueError("Format not supported. Product type {:1}" " Format major version: {:2}".format(ptype, fmv))
def read(self, timestamp=None, ssm_masked=False): """ reads from the netCDF file given by the filename Returns ------- data : pygeobase.object_base.Image """ if self.ds is None: self.ds = netCDF4.Dataset(self.filename) if self.nc_variables is None: var_to_read = self.ds.variables.keys() else: var_to_read = self.nc_variables # make sure that essential variables are read always: if 'latitude' not in var_to_read: var_to_read.append('latitude') if 'longitude' not in var_to_read: var_to_read.append('longitude') # store data in dictionary dd = {} metadata = {} metadata['sat_id'] = self.ds.platform_long_name[-1] metadata['orbit_start'] = self.ds.start_orbit_number metadata['processor_major_version'] = self.ds.processor_major_version metadata['product_minor_version'] = self.ds.product_minor_version metadata['format_major_version'] = self.ds.format_major_version metadata['format_minor_version'] = self.ds.format_minor_version num_cells = self.ds.dimensions['numCells'].size for name in var_to_read: variable = self.ds.variables[name] dd[name] = variable[:].flatten() if len(variable.shape) == 1: # If the data is 1D then we repeat it for each cell dd[name] = np.repeat(dd[name], num_cells) if name == 'utc_line_nodes': utc_dates = netCDF4.num2date( dd[name], variable.units).astype('datetime64[ns]') dd['jd'] = (utc_dates - ref_dt)/np.timedelta64(1, 'D') + ref_jd # if the ssm_masked is True we mask out data with missing ssm value if 'soil_moisture' in dd and ssm_masked is True: # mask all the arrays based on fill_value of latitude valid_data = ~dd['soil_moisture'].mask for name in dd: dd[name] = dd[name][valid_data] longitude = dd.pop('longitude') latitude = dd.pop('latitude') n_records = latitude.shape[0] n_lines = n_records // num_cells dd['node_num'] = np.tile((np.arange(num_cells) + 1), n_lines) dd['line_num'] = np.arange(n_lines).repeat(num_cells) dd['as_des_pass'] = (dd['sat_track_azi'] < 270).astype(np.uint8) return Image(longitude, latitude, dd, metadata, timestamp, timekey='utc_line_nodes')
def bfr2generic(native_img): """ Convert the native bfr image into a generic one. Parameters ---------- native_img : pygeobase.object_base.Image Native image. Returns ------- img : pygeobase.object_base.Image Generic images. """ n_records = native_img.lat.shape[0] generic_data = get_template_ASCATL2_SMX(n_records) fields = [ ('jd', 'jd', None), ('sat_id', 'Satellite Identifier', None), ('abs_line_nr', None, None), ('abs_orbit_nr', 'Orbit Number', None), ('node_num', 'Cross-Track Cell Number', None), ('line_num', 'line_num', None), ('as_des_pass', 'as_des_pass', None), ('swath', 'swath_indicator', None), ('azif', 'f_Antenna Beam Azimuth', 1.7e+38), ('azim', 'm_Antenna Beam Azimuth', 1.7e+38), ('azia', 'a_Antenna Beam Azimuth', 1.7e+38), ('incf', 'f_Radar Incidence Angle', 1.7e+38), ('incm', 'm_Radar Incidence Angle', 1.7e+38), ('inca', 'a_Radar Incidence Angle', 1.7e+38), ('sigf', 'f_Backscatter', 1.7e+38), ('sigm', 'm_Backscatter', 1.7e+38), ('siga', 'a_Backscatter', 1.7e+38), ('sm', 'Surface Soil Moisture (Ms)', 1.7e+38), ('sm_noise', 'Estimated Error In Surface Soil Moisture', 1.7e+38), ('sm_sensitivity', 'Soil Moisture Sensitivity', 1.7e+38), ('sig40', 'Backscatter', 1.7e+38), ('sig40_noise', 'Estimated Error In Sigma0 At 40 Deg Incidence Angle', 1.7e+38), ('slope40', 'Slope At 40 Deg Incidence Angle', 1.7e+38), ('slope40_noise', 'Estimated Error In Slope At 40 Deg Incidence Angle', 1.7e+38), ('dry_backscatter', 'Dry Backscatter', 1.7e+38), ('wet_backscatter', 'Wet Backscatter', 1.7e+38), ('mean_surf_sm', 'Mean Surface Soil Moisture', 1.7e+40), ('correction_flag', 'Soil Moisture Correction Flag', 1.7e+38), ('processing_flag', 'Soil Moisture Processing Flag', 1.7e+38), ('aggregated_quality_flag', None), ('snow_cover_probability', 'Snow Cover', 1.7e+38), ('frozen_soil_probability', 'Frozen Land Surface Fraction', 1.7e+38), ('innudation_or_wetland', 'Inundation And Wetland Fraction', 1.7e+38), ('topographical_complexity', 'Topographic Complexity', 1.7e+38) ] for field in fields: if field[1] is None: continue if field[2] is not None: valid_mask = (native_img.data[field[1]] != field[2]) generic_data[field[0]][valid_mask] = native_img.data[ field[1]][valid_mask] else: generic_data[field[0]] = native_img.data[field[1]] # convert sat_id (spacecraft id) to the intern definition sat_id_lut = np.array([0, 0, 0, 4, 3, 5]) generic_data['sat_id'] = sat_id_lut[generic_data['sat_id']] img = Image(native_img.lon, native_img.lat, generic_data, native_img.metadata, native_img.timestamp, timekey='jd') return img