def get_array(self, key): """Get all data from file for the given BUFR key.""" with open(self.filename, "rb") as fh: msgCount = 0 while True: bufr = ec.codes_bufr_new_from_file(fh) if bufr is None: break ec.codes_set(bufr, 'unpack', 1) values = ec.codes_get_array(bufr, key, float) if len(values) == 1: values = np.repeat(values, 120) # if is the first message initialise our final array if (msgCount == 0): arr = da.from_array([values], chunks=CHUNK_SIZE) else: tmpArr = da.from_array([values], chunks=CHUNK_SIZE) arr = da.concatenate((arr, tmpArr), axis=0) msgCount = msgCount + 1 ec.codes_release(bufr) if arr.size == 1: arr = arr[0] return arr
def get_start_end_date(self): """Get the first and last date from the bufr file.""" with open(self.filename, 'rb') as fh: date_min = None date_max = None while True: # get handle for message bufr = ec.codes_bufr_new_from_file(fh) if bufr is None: break date_min, date_max = self.extract_msg_date_extremes(bufr, date_min, date_max) return date_min, date_max
def get_bufr_data(self, key): """Get BUFR data by key.""" attr = np.array([]) with open(self.filename, 'rb') as fh: while True: # get handle for message bufr = ec.codes_bufr_new_from_file(fh) if bufr is None: break ec.codes_set(bufr, 'unpack', 1) tmp = ec.codes_get_array(bufr, key, float) if len(tmp) == 1: size = ec.codes_get(bufr, 'numberOfSubsets') tmp = np.resize(tmp, size) attr = np.append(attr, tmp) ec.codes_release(bufr) return attr
def decode_file(filename, report_type, field_lists): '''Returns a list of tuples, where each tuple is (message_data, report_count). message_data is dictionary of name: values where the name is the field and the values is a masked numpy array for all reports. Note the length of these arrays is not necessarily the same as the number of reports. Accumulated fields will have names like rain@60min or maxtemp@6h''' if not os.path.exists(filename): LOGGER.error('decode_file could not find file: %s', filename) return [] if report_type == 'metar': # metar is so specialised that it has it's own decoding methods return decode_metar_file(filename, field_lists) # open bufr file result = [] with open(filename) as bufr_file: # loop through the messages in the file for each_message in range(eccodes.codes_count_in_file(bufr_file)): # get handle for message msgid = eccodes.codes_bufr_new_from_file(bufr_file) if msgid is None: break # decoded all of the messages in the file try: # we need to instruct ecCodes to expand all the BUFR descriptors try: eccodes.codes_set(msgid, 'unpack', 1) except eccodes.CodesInternalError: LOGGER.warning('failed to unpack message in %s', filename) break message_data, message_report_count = process_message( msgid, field_lists) if message_report_count > 0: # special temp decoding, sorry... if report_type == 'temp': message_data = decode_temp_report( message_data, message_report_count) result.append((message_data, message_report_count)) LOGGER.debug('found %i reports in message[%d] of %s', message_report_count, each_message, os.path.basename(filename)) finally: # release the handle for this message eccodes.codes_release(msgid) return result
def get_attribute(self, key): """Get BUFR attributes.""" # This function is inefficient as it is looping through the entire # file to get 1 attribute. It causes a problem though if you break # from the file early - dont know why but investigating - fix later fh = open(self.filename, "rb") while True: # get handle for message bufr = ec.codes_bufr_new_from_file(fh) if bufr is None: break ec.codes_set(bufr, 'unpack', 1) attr = ec.codes_get(bufr, key) ec.codes_release(bufr) fh.close() return attr
def get_next_msg(self): # #[ """ step to the next BUFR message in the open file """ print('getting next message') if self.msg_loaded < self.num_msgs: self.msg_loaded += 1 # get an instance of the eccodes bufr class self.bufr_id = eccodes.codes_bufr_new_from_file(self.fd) print('self.bufr_id = ', self.bufr_id) if self.bufr_id is None: raise StopIteration else: self.msg_loaded = -1 self.bufr_id = -1 raise StopIteration # unpack this bufr message eccodes.codes_set(self.bufr_id,'unpack',1)
def get_start_end_date(self): """Get the first and last date from the bufr file.""" fh = open(self.filename, "rb") i = 0 while True: # get handle for message bufr = ec.codes_bufr_new_from_file(fh) if bufr is None: break ec.codes_set(bufr, 'unpack', 1) year = ec.codes_get(bufr, 'year') month = ec.codes_get(bufr, 'month') day = ec.codes_get(bufr, 'day') hour = ec.codes_get(bufr, 'hour') minute = ec.codes_get(bufr, 'minute') second = ec.codes_get(bufr, 'second') obs_time = datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second) if i == 0: start_time = obs_time ec.codes_release(bufr) i += 1 end_time = obs_time fh.close() return start_time, end_time
def __init__(self, bufrfile, variables, valid_dtg, valid_range, lonrange=None, latrange=None, label=""): if lonrange is None: lonrange = [-180, 180] if latrange is None: latrange = [-90, 90] if eccodes is None: raise Exception("ECCODES not found. Needed for bufr reading") # open bufr file f = open(bufrfile) # define the keys to be printed keys = [ # 'blockNumber', # 'stationNumber', 'latitude', 'longitude', 'year', 'month', 'day', 'hour', 'minute', 'heightOfStationGroundAboveMeanSeaLevel', 'heightOfStation', 'stationNumber', 'blockNumber' ] nerror = {} ntime = {} nundef = {} ndomain = {} nobs = {} for var in variables: if var == "relativeHumidityAt2M": keys.append("airTemperatureAt2M") keys.append("dewpointTemperatureAt2M") else: keys.append(var) nerror.update({var: 0}) ntime.update({var: 0}) nundef.update({var: 0}) ndomain.update({var: 0}) nobs.update({var: 0}) # The cloud information is stored in several blocks in the # SYNOP message and the same key means a different thing in different # parts of the message. In this example we will read the first # cloud block introduced by the key # verticalSignificanceSurfaceObservations=1. # We know that this is the first occurrence of the keys we want to # read so in the list above we used the # (occurrence) operator # accordingly. print("Reading " + bufrfile) print("Looking for keys: " + str(keys)) cnt = 0 observations = list() # loop for the messages in the file # nerror = 0 # ndomain = 0 # nundef = 0 # ntime = 0 not_decoded = 0 # removed = 0 while 1: # get handle for message bufr = eccodes.codes_bufr_new_from_file(f) if bufr is None: break # print("message: %s" % cnt) # we need to instruct ecCodes to expand all the descriptors # i.e. unpack the data values try: eccodes.codes_set(bufr, 'unpack', 1) decoded = True except eccodes.CodesInternalError as err: not_decoded = not_decoded + 1 print('Error with key="unpack" : %s' % err.msg) decoded = False # print the values for the selected keys from the message if decoded: lat = np.nan lon = np.nan value = np.nan elev = np.nan year = -1 month = -1 day = -1 hour = -1 minute = -1 stid = "NA" station_number = -1 block_number = -1 t2m = np.nan td2m = np.nan # rh2m = np.nan sd = np.nan # all_found = True for key in keys: try: val = eccodes.codes_get(bufr, key) # if val != CODES_MISSING_DOUBLE: # print(' %s: %s' % (key,val)) if val == eccodes.CODES_MISSING_DOUBLE or val == eccodes.CODES_MISSING_LONG: val = np.nan if key == "latitude": lat = val if key == "longitude": lon = val if key == "year": year = val if key == "month": month = val if key == "day": day = val if key == "hour": hour = val if key == "minute": minute = val if key == "heightOfStation": elev = val if key == "heightOfStationGroundAboveMeanSeaLevel": elev = val if key == "stationNumber": station_number = val if key == "blockNumber": block_number = val if key == "airTemperatureAt2M": t2m = val if key == "dewpointTemperatureAt2M": td2m = val if key == "totalSnowDepth": sd = val except eccodes.CodesInternalError: pass # all_found = False # print('Report does not contain key="%s" : %s' % (key, err.msg)) # Assign value to var for var in variables: if var == "relativeHumidityAt2M": if not np.isnan(t2m) and not np.isnan(td2m): value = self.td2rh(td2m, t2m) value = value * 0.01 elif var == "airTemperatureAt2M": value = t2m elif var == "totalSnowDepth": value = sd else: raise NotImplementedError("Var " + var + " is not coded! Please do it!") all_found = True if np.isnan(lat): all_found = False if np.isnan(lon): all_found = False if year == -1: all_found = False if month == -1: all_found = False if day == -1: all_found = False if hour == -1: all_found = False if minute == -1: all_found = False if np.isnan(elev): all_found = False if np.isnan(value): all_found = False if not all_found: nerror.update({var: nerror[var] + 1}) # print(lon, lonrange[0], lonrange[1], lat, latrange[0],latrange[1]) if latrange[0] <= lat <= latrange[1] and lonrange[0] <= lon <= lonrange[1]: obs_dtg = datetime(year=year, month=month, day=day, hour=hour, minute=minute) # print(value) if not np.isnan(value): if self.inside_window(obs_dtg, valid_dtg, valid_range): # print(valid_dtg, lon, lat, value, elev, stid) if station_number > 0 and block_number > 0: stid = str((block_number * 1000) + station_number) observations.append(surfex.obs.Observation(obs_dtg, lon, lat, value, elev=elev, stid=stid, varname=var)) nobs.update({var: nobs[var] + 1}) else: ntime.update({var: ntime[var] + 1}) else: nundef.update({var: nundef[var] + 1}) else: ndomain.update({var: ndomain[var] + 1}) cnt += 1 if (cnt % 1000) == 0: print('.', end='') sys.stdout.flush() # delete handle eccodes.codes_release(bufr) print("\nFound " + str(len(observations)) + "/" + str(cnt)) print("Not decoded: " + str(not_decoded)) for var in variables: print("\nObservations for var=" + var + ": " + str(nobs[var])) print("Observations removed because of domain check: " + str(ndomain[var])) print("Observations removed because of not being defined/found: " + str(nundef[var])) print("Observations removed because of time window: " + str(ntime[var])) print("Messages not containing information on all keys: " + str(nerror[var])) # close the file f.close() surfex.obs.ObservationSet.__init__(self, observations, label=label)
def readBufrFile(bufrFile, var, lonrange, latrange): # open bufr file f = open(bufrFile) # define the keys to be printed keys = [ #'blockNumber', #'stationNumber', 'latitude', 'longitude', 'heightOfStation' ] #'airTemperatureAt2M', # 'relativeHumidity', # 'totalSnowDepth' keys.append(var) # The cloud information is stored in several blocks in the # SYNOP message and the same key means a different thing in different # parts of the message. In this example we will read the first # cloud block introduced by the key # verticalSignificanceSurfaceObservations=1. # We know that this is the first occurrence of the keys we want to # read so in the list above we used the # (occurrence) operator # accordingly. print("Reading " + bufrFile) print("Looking for keys: " + str(keys)) cnt = 0 observations = list() # loop for the messages in the file not_found = 0 while 1: # get handle for message bufr = codes_bufr_new_from_file(f) if bufr is None: break #print("message: %s" % cnt) # we need to instruct ecCodes to expand all the descriptors # i.e. unpack the data values codes_set(bufr, 'unpack', 1) # print the values for the selected keys from the message lat = np.nan lon = np.nan value = np.nan elev = np.nan for key in keys: try: val = codes_get(bufr, key) #if val != CODES_MISSING_DOUBLE: # print(' %s: %s' % (key,val)) if val == CODES_MISSING_DOUBLE or val == CODES_MISSING_LONG: val = np.nan if key == "latitude": lat = val if key == "longitude": lon = val if key == "heightOfStation": elev = val if key == var: value = val if var == "relativeHumidity": if value > 100: values = 100. elif var == "airTemperatureAt2M": value = value - 273.15 except CodesInternalError as err: if key == var: not_found = not_found + 1 #print('Error with key="%s" : %s' % (key, err.msg)) if lat > latrange[0] and lat < latrange[1] and lon > lonrange[ 0] and lon < lonrange[1]: if not np.isnan(value): observations.append(observation(lon, lat, value, elev)) cnt += 1 # delete handle codes_release(bufr) print("Found " + str(len(observations)) + "/" + str(cnt)) print("Not encoded for " + str(var) + ": " + str(not_found)) # close the file f.close() return observations
def bufr_decode(f, fn, archive, args, fakeTimes=True, fakeDisplacement=True, logFixup=True): ibufr = codes_bufr_new_from_file(f) if not ibufr: raise BufrUnreadableError("empty file", fn, archive) codes_set(ibufr, "unpack", 1) missingHdrKeys = 0 header = {} try: k = "extendedDelayedDescriptorReplicationFactor" num_samples = codes_get_array(ibufr, k)[0] except Exception as e: codes_release(ibufr) raise MissingKeyError(k, message=f"cant determine number of samples: {e}") # BAIL HERE if no num_samples ivals = [ "typicalYear", "typicalMonth", "typicalDay", "typicalHour", "typicalMinute", "typicalSecond", "blockNumber", "stationNumber", "radiosondeType", "height", "year", "month", "day", "hour", "minute", "second", "correctionAlgorithmsForHumidityMeasurements", "pressureSensorType", "temperatureSensorType", "humiditySensorType", "geopotentialHeightCalculation", "trackingTechniqueOrStatusOfSystem", "measuringEquipmentType", ] fvals = [ "radiosondeOperatingFrequency", "latitude", "longitude", "heightOfStationGroundAboveMeanSeaLevel", "heightOfBarometerAboveMeanSeaLevel", ] svals = [ "radiosondeSerialNumber", "typicalDate", "typicalTime", "text", "softwareVersionNumber", ] for k in ivals + fvals + svals: try: value = codes_get(ibufr, k) if k in ivals: if value != CODES_MISSING_LONG: header[k] = value elif k in fvals: if value != CODES_MISSING_DOUBLE: header[k] = value elif k in svals: header[k] = value else: pass except Exception as e: logging.debug(f"missing header key={k} e={e}") missingHdrKeys += 1 # special-case warts we do not really care about warts = ["shipOrMobileLandStationIdentifier"] for k in warts: try: header[k] = codes_get(ibufr, k) except Exception: missingHdrKeys += 1 fkeys = [ # 'extendedVerticalSoundingSignificance', "pressure", "nonCoordinateGeopotentialHeight", "latitudeDisplacement", "longitudeDisplacement", "airTemperature", "dewpointTemperature", "windDirection", "windSpeed", ] samples = [] invalidSamples = 0 missingValues = 0 fakeTimeperiod = 0 fixups = [] # report once only for i in range(1, num_samples + 1): sample = {} k = "timePeriod" timePeriod = codes_get(ibufr, f"#{i}#{k}") if timePeriod == CODES_MISSING_LONG: invalidSamples += 1 if not fakeTimes: continue else: timePeriod = fakeTimeperiod fakeTimeperiod += FAKE_TIME_STEPS if k not in fixups: logging.debug( f"FIXUP timePeriod fakeTimes:{fakeTimes} fakeTimeperiod={fakeTimeperiod}" ) fixups.append(k) sample[k] = timePeriod replaceable = ["latitudeDisplacement", "longitudeDisplacement"] sampleOK = True for k in fkeys: name = f"#{i}#{k}" try: value = codes_get(ibufr, name) if value != CODES_MISSING_DOUBLE: sample[k] = value else: if fakeDisplacement and k in replaceable: if k not in fixups: logging.debug(f"--FIXUP key {k}") fixups.append(k) sample[k] = 0 else: # logging.warning(f"--MISSING {i} key {k} ") sampleOK = False missingValues += 1 except Exception as e: sampleOK = False logging.debug(f"sample={i} key={k} e={e}, skipping") missingValues += 1 if sampleOK: samples.append(sample) logging.debug((f"samples used={len(samples)}, invalid samples=" f"{invalidSamples}, skipped header keys={missingHdrKeys}," f" missing values={missingValues}")) codes_release(ibufr) return header, samples