Esempio n. 1
0
    def get_array(self, key):
        """Get all data from file for the given BUFR key."""
        with open(self.filename, "rb") as fh:
            msgCount = 0
            while True:

                bufr = ec.codes_bufr_new_from_file(fh)
                if bufr is None:
                    break

                ec.codes_set(bufr, 'unpack', 1)

                values = ec.codes_get_array(bufr, key, float)

                if len(values) == 1:
                    values = np.repeat(values, 120)

                # if is the first message initialise our final array
                if (msgCount == 0):

                    arr = da.from_array([values], chunks=CHUNK_SIZE)
                else:
                    tmpArr = da.from_array([values], chunks=CHUNK_SIZE)

                    arr = da.concatenate((arr, tmpArr), axis=0)

                msgCount = msgCount + 1
                ec.codes_release(bufr)

        if arr.size == 1:
            arr = arr[0]

        return arr
Esempio n. 2
0
 def get_start_end_date(self):
     """Get the first and last date from the bufr file."""
     with open(self.filename, 'rb') as fh:
         date_min = None
         date_max = None
         while True:
             # get handle for message
             bufr = ec.codes_bufr_new_from_file(fh)
             if bufr is None:
                 break
             date_min, date_max = self.extract_msg_date_extremes(bufr, date_min, date_max)
         return date_min, date_max
Esempio n. 3
0
 def get_bufr_data(self, key):
     """Get BUFR data by key."""
     attr = np.array([])
     with open(self.filename, 'rb') as fh:
         while True:
             # get handle for message
             bufr = ec.codes_bufr_new_from_file(fh)
             if bufr is None:
                 break
             ec.codes_set(bufr, 'unpack', 1)
             tmp = ec.codes_get_array(bufr, key, float)
             if len(tmp) == 1:
                 size = ec.codes_get(bufr, 'numberOfSubsets')
                 tmp = np.resize(tmp, size)
             attr = np.append(attr, tmp)
             ec.codes_release(bufr)
     return attr
Esempio n. 4
0
def decode_file(filename, report_type, field_lists):
    '''Returns a list of tuples, where each tuple is (message_data, report_count).
    message_data is dictionary of name: values
    where the name is the field and the values is a masked numpy array for all reports.
    Note the length of these arrays is not necessarily the same as the number of reports.
    Accumulated fields will have names like rain@60min or maxtemp@6h'''
    if not os.path.exists(filename):
        LOGGER.error('decode_file could not find file: %s', filename)
        return []
    if report_type == 'metar':
        # metar is so specialised that it has it's own decoding methods
        return decode_metar_file(filename, field_lists)

    # open bufr file
    result = []
    with open(filename) as bufr_file:
        # loop through the messages in the file
        for each_message in range(eccodes.codes_count_in_file(bufr_file)):
            # get handle for message
            msgid = eccodes.codes_bufr_new_from_file(bufr_file)
            if msgid is None:
                break  # decoded all of the messages in the file
            try:
                # we need to instruct ecCodes to expand all the BUFR descriptors
                try:
                    eccodes.codes_set(msgid, 'unpack', 1)
                except eccodes.CodesInternalError:
                    LOGGER.warning('failed to unpack message in %s', filename)
                    break

                message_data, message_report_count = process_message(
                    msgid, field_lists)
                if message_report_count > 0:
                    # special temp decoding, sorry...
                    if report_type == 'temp':
                        message_data = decode_temp_report(
                            message_data, message_report_count)
                    result.append((message_data, message_report_count))
                LOGGER.debug('found %i reports in message[%d] of %s',
                             message_report_count, each_message,
                             os.path.basename(filename))
            finally:
                # release the handle for this message
                eccodes.codes_release(msgid)

        return result
Esempio n. 5
0
    def get_attribute(self, key):
        """Get BUFR attributes."""
        # This function is inefficient as it is looping through the entire
        # file to get 1 attribute. It causes a problem though if you break
        # from the file early - dont know why but investigating - fix later
        fh = open(self.filename, "rb")
        while True:
            # get handle for message
            bufr = ec.codes_bufr_new_from_file(fh)
            if bufr is None:
                break
            ec.codes_set(bufr, 'unpack', 1)
            attr = ec.codes_get(bufr, key)
            ec.codes_release(bufr)

        fh.close()
        return attr
Esempio n. 6
0
    def get_next_msg(self):
        #  #[
        """
        step to the next BUFR message in the open file
        """
        print('getting next message')
        
        if self.msg_loaded < self.num_msgs:
            self.msg_loaded += 1
            # get an instance of the eccodes bufr class
            self.bufr_id = eccodes.codes_bufr_new_from_file(self.fd)
            print('self.bufr_id = ', self.bufr_id)
            if self.bufr_id is None:
                raise StopIteration
        else:
            self.msg_loaded = -1
            self.bufr_id = -1
            raise StopIteration

        # unpack this bufr message
        eccodes.codes_set(self.bufr_id,'unpack',1)
Esempio n. 7
0
    def get_start_end_date(self):
        """Get the first and last date from the bufr file."""
        fh = open(self.filename, "rb")
        i = 0
        while True:
            # get handle for message
            bufr = ec.codes_bufr_new_from_file(fh)
            if bufr is None:
                break
            ec.codes_set(bufr, 'unpack', 1)
            year = ec.codes_get(bufr, 'year')
            month = ec.codes_get(bufr, 'month')
            day = ec.codes_get(bufr, 'day')
            hour = ec.codes_get(bufr, 'hour')
            minute = ec.codes_get(bufr, 'minute')
            second = ec.codes_get(bufr, 'second')

            obs_time = datetime(year=year,
                                month=month,
                                day=day,
                                hour=hour,
                                minute=minute,
                                second=second)

            if i == 0:
                start_time = obs_time

            ec.codes_release(bufr)

            i += 1

        end_time = obs_time

        fh.close()

        return start_time, end_time
Esempio n. 8
0
    def __init__(self, bufrfile, variables, valid_dtg, valid_range, lonrange=None, latrange=None, label=""):

        if lonrange is None:
            lonrange = [-180, 180]
        if latrange is None:
            latrange = [-90, 90]

        if eccodes is None:
            raise Exception("ECCODES not found. Needed for bufr reading")

        # open bufr file
        f = open(bufrfile)

        # define the keys to be printed
        keys = [
            # 'blockNumber',
            # 'stationNumber',
            'latitude',
            'longitude',
            'year',
            'month',
            'day',
            'hour',
            'minute',
            'heightOfStationGroundAboveMeanSeaLevel',
            'heightOfStation',
            'stationNumber',
            'blockNumber'
        ]
        nerror = {}
        ntime = {}
        nundef = {}
        ndomain = {}
        nobs = {}
        for var in variables:
            if var == "relativeHumidityAt2M":
                keys.append("airTemperatureAt2M")
                keys.append("dewpointTemperatureAt2M")
            else:
                keys.append(var)
            nerror.update({var: 0})
            ntime.update({var: 0})
            nundef.update({var: 0})
            ndomain.update({var: 0})
            nobs.update({var: 0})

        # The cloud information is stored in several blocks in the
        # SYNOP message and the same key means a different thing in different
        # parts of the message. In this example we will read the first
        # cloud block introduced by the key
        # verticalSignificanceSurfaceObservations=1.
        # We know that this is the first occurrence of the keys we want to
        # read so in the list above we used the # (occurrence) operator
        # accordingly.

        print("Reading " + bufrfile)
        print("Looking for keys: " + str(keys))
        cnt = 0
        observations = list()

        # loop for the messages in the file
        # nerror = 0
        # ndomain = 0
        # nundef = 0
        # ntime = 0
        not_decoded = 0
        # removed = 0
        while 1:
            # get handle for message
            bufr = eccodes.codes_bufr_new_from_file(f)
            if bufr is None:
                break

            # print("message: %s" % cnt)

            # we need to instruct ecCodes to expand all the descriptors
            # i.e. unpack the data values
            try:
                eccodes.codes_set(bufr, 'unpack', 1)
                decoded = True
            except eccodes.CodesInternalError as err:
                not_decoded = not_decoded + 1
                print('Error with key="unpack" : %s' % err.msg)
                decoded = False

            # print the values for the selected keys from the message
            if decoded:
                lat = np.nan
                lon = np.nan
                value = np.nan
                elev = np.nan
                year = -1
                month = -1
                day = -1
                hour = -1
                minute = -1
                stid = "NA"
                station_number = -1
                block_number = -1
                t2m = np.nan
                td2m = np.nan
                # rh2m = np.nan
                sd = np.nan
                # all_found = True
                for key in keys:
                    try:
                        val = eccodes.codes_get(bufr, key)
                        # if val != CODES_MISSING_DOUBLE:
                        #    print('  %s: %s' % (key,val))
                        if val == eccodes.CODES_MISSING_DOUBLE or val == eccodes.CODES_MISSING_LONG:
                            val = np.nan
                        if key == "latitude":
                            lat = val
                        if key == "longitude":
                            lon = val
                        if key == "year":
                            year = val
                        if key == "month":
                            month = val
                        if key == "day":
                            day = val
                        if key == "hour":
                            hour = val
                        if key == "minute":
                            minute = val
                        if key == "heightOfStation":
                            elev = val
                        if key == "heightOfStationGroundAboveMeanSeaLevel":
                            elev = val
                        if key == "stationNumber":
                            station_number = val
                        if key == "blockNumber":
                            block_number = val
                        if key == "airTemperatureAt2M":
                            t2m = val
                        if key == "dewpointTemperatureAt2M":
                            td2m = val
                        if key == "totalSnowDepth":
                            sd = val

                    except eccodes.CodesInternalError:
                        pass
                        # all_found = False
                        # print('Report does not contain key="%s" : %s' % (key, err.msg))

                # Assign value to var
                for var in variables:
                    if var == "relativeHumidityAt2M":
                        if not np.isnan(t2m) and not np.isnan(td2m):
                            value = self.td2rh(td2m, t2m)
                        value = value * 0.01
                    elif var == "airTemperatureAt2M":
                        value = t2m
                    elif var == "totalSnowDepth":
                        value = sd
                    else:
                        raise NotImplementedError("Var " + var + " is not coded! Please do it!")

                    all_found = True
                    if np.isnan(lat):
                        all_found = False
                    if np.isnan(lon):
                        all_found = False
                    if year == -1:
                        all_found = False
                    if month == -1:
                        all_found = False
                    if day == -1:
                        all_found = False
                    if hour == -1:
                        all_found = False
                    if minute == -1:
                        all_found = False
                    if np.isnan(elev):
                        all_found = False
                    if np.isnan(value):
                        all_found = False

                    if not all_found:
                        nerror.update({var: nerror[var] + 1})

                    # print(lon, lonrange[0], lonrange[1], lat, latrange[0],latrange[1])
                    if latrange[0] <= lat <= latrange[1] and lonrange[0] <= lon <= lonrange[1]:
                        obs_dtg = datetime(year=year, month=month, day=day, hour=hour, minute=minute)
                        # print(value)
                        if not np.isnan(value):
                            if self.inside_window(obs_dtg, valid_dtg, valid_range):
                                # print(valid_dtg, lon, lat, value, elev, stid)
                                if station_number > 0 and block_number > 0:
                                    stid = str((block_number * 1000) + station_number)
                                observations.append(surfex.obs.Observation(obs_dtg, lon, lat, value,
                                                                           elev=elev, stid=stid, varname=var))
                                nobs.update({var: nobs[var] + 1})
                            else:
                                ntime.update({var: ntime[var] + 1})
                        else:
                            nundef.update({var: nundef[var] + 1})
                    else:
                        ndomain.update({var: ndomain[var] + 1})

                cnt += 1

                if (cnt % 1000) == 0:
                    print('.', end='')
                    sys.stdout.flush()

            # delete handle
            eccodes.codes_release(bufr)

        print("\nFound " + str(len(observations)) + "/" + str(cnt))
        print("Not decoded: " + str(not_decoded))
        for var in variables:
            print("\nObservations for var=" + var + ": " + str(nobs[var]))
            print("Observations removed because of domain check: " + str(ndomain[var]))
            print("Observations removed because of not being defined/found: " + str(nundef[var]))
            print("Observations removed because of time window: " + str(ntime[var]))
            print("Messages not containing information on all keys: " + str(nerror[var]))
        # close the file
        f.close()

        surfex.obs.ObservationSet.__init__(self, observations, label=label)
Esempio n. 9
0
def readBufrFile(bufrFile, var, lonrange, latrange):
    # open bufr file
    f = open(bufrFile)

    # define the keys to be printed
    keys = [
        #'blockNumber',
        #'stationNumber',
        'latitude',
        'longitude',
        'heightOfStation'
    ]
    #'airTemperatureAt2M',
    #    'relativeHumidity',
    #    'totalSnowDepth'
    keys.append(var)

    # The cloud information is stored in several blocks in the
    # SYNOP message and the same key means a different thing in different
    # parts of the message. In this example we will read the first
    # cloud block introduced by the key
    # verticalSignificanceSurfaceObservations=1.
    # We know that this is the first occurrence of the keys we want to
    # read so in the list above we used the # (occurrence) operator
    # accordingly.

    print("Reading " + bufrFile)
    print("Looking for keys: " + str(keys))
    cnt = 0
    observations = list()

    # loop for the messages in the file
    not_found = 0
    while 1:
        # get handle for message
        bufr = codes_bufr_new_from_file(f)
        if bufr is None:
            break

        #print("message: %s" % cnt)

        # we need to instruct ecCodes to expand all the descriptors
        # i.e. unpack the data values
        codes_set(bufr, 'unpack', 1)

        # print the values for the selected keys from the message

        lat = np.nan
        lon = np.nan
        value = np.nan
        elev = np.nan
        for key in keys:
            try:
                val = codes_get(bufr, key)
                #if val != CODES_MISSING_DOUBLE:
                #    print('  %s: %s' % (key,val))
                if val == CODES_MISSING_DOUBLE or val == CODES_MISSING_LONG:
                    val = np.nan
                if key == "latitude": lat = val
                if key == "longitude": lon = val
                if key == "heightOfStation": elev = val
                if key == var:
                    value = val
                    if var == "relativeHumidity":
                        if value > 100: values = 100.
                    elif var == "airTemperatureAt2M":
                        value = value - 273.15

            except CodesInternalError as err:
                if key == var:
                    not_found = not_found + 1
                #print('Error with key="%s" : %s' % (key, err.msg))

        if lat > latrange[0] and lat < latrange[1] and lon > lonrange[
                0] and lon < lonrange[1]:
            if not np.isnan(value):
                observations.append(observation(lon, lat, value, elev))

        cnt += 1

        # delete handle
        codes_release(bufr)

    print("Found " + str(len(observations)) + "/" + str(cnt))
    print("Not encoded for " + str(var) + ": " + str(not_found))
    # close the file
    f.close()
    return observations
def bufr_decode(f,
                fn,
                archive,
                args,
                fakeTimes=True,
                fakeDisplacement=True,
                logFixup=True):
    ibufr = codes_bufr_new_from_file(f)
    if not ibufr:
        raise BufrUnreadableError("empty file", fn, archive)
    codes_set(ibufr, "unpack", 1)

    missingHdrKeys = 0
    header = {}
    try:
        k = "extendedDelayedDescriptorReplicationFactor"
        num_samples = codes_get_array(ibufr, k)[0]
    except Exception as e:
        codes_release(ibufr)
        raise MissingKeyError(k,
                              message=f"cant determine number of samples: {e}")

    # BAIL HERE if no num_samples

    ivals = [
        "typicalYear",
        "typicalMonth",
        "typicalDay",
        "typicalHour",
        "typicalMinute",
        "typicalSecond",
        "blockNumber",
        "stationNumber",
        "radiosondeType",
        "height",
        "year",
        "month",
        "day",
        "hour",
        "minute",
        "second",
        "correctionAlgorithmsForHumidityMeasurements",
        "pressureSensorType",
        "temperatureSensorType",
        "humiditySensorType",
        "geopotentialHeightCalculation",
        "trackingTechniqueOrStatusOfSystem",
        "measuringEquipmentType",
    ]
    fvals = [
        "radiosondeOperatingFrequency",
        "latitude",
        "longitude",
        "heightOfStationGroundAboveMeanSeaLevel",
        "heightOfBarometerAboveMeanSeaLevel",
    ]
    svals = [
        "radiosondeSerialNumber",
        "typicalDate",
        "typicalTime",
        "text",
        "softwareVersionNumber",
    ]

    for k in ivals + fvals + svals:
        try:
            value = codes_get(ibufr, k)
            if k in ivals:
                if value != CODES_MISSING_LONG:
                    header[k] = value
            elif k in fvals:
                if value != CODES_MISSING_DOUBLE:
                    header[k] = value
            elif k in svals:
                header[k] = value
            else:
                pass
        except Exception as e:
            logging.debug(f"missing header key={k} e={e}")
            missingHdrKeys += 1

    # special-case warts we do not really care about
    warts = ["shipOrMobileLandStationIdentifier"]

    for k in warts:
        try:
            header[k] = codes_get(ibufr, k)
        except Exception:
            missingHdrKeys += 1

    fkeys = [  # 'extendedVerticalSoundingSignificance',
        "pressure",
        "nonCoordinateGeopotentialHeight",
        "latitudeDisplacement",
        "longitudeDisplacement",
        "airTemperature",
        "dewpointTemperature",
        "windDirection",
        "windSpeed",
    ]

    samples = []
    invalidSamples = 0
    missingValues = 0
    fakeTimeperiod = 0
    fixups = []  # report once only

    for i in range(1, num_samples + 1):
        sample = {}

        k = "timePeriod"
        timePeriod = codes_get(ibufr, f"#{i}#{k}")
        if timePeriod == CODES_MISSING_LONG:

            invalidSamples += 1
            if not fakeTimes:
                continue
            else:
                timePeriod = fakeTimeperiod
                fakeTimeperiod += FAKE_TIME_STEPS
                if k not in fixups:
                    logging.debug(
                        f"FIXUP timePeriod fakeTimes:{fakeTimes} fakeTimeperiod={fakeTimeperiod}"
                    )
                    fixups.append(k)

        sample[k] = timePeriod
        replaceable = ["latitudeDisplacement", "longitudeDisplacement"]
        sampleOK = True
        for k in fkeys:
            name = f"#{i}#{k}"
            try:
                value = codes_get(ibufr, name)
                if value != CODES_MISSING_DOUBLE:
                    sample[k] = value
                else:
                    if fakeDisplacement and k in replaceable:
                        if k not in fixups:
                            logging.debug(f"--FIXUP  key {k}")
                            fixups.append(k)
                        sample[k] = 0
                    else:
                        # logging.warning(f"--MISSING {i} key {k} ")
                        sampleOK = False
                        missingValues += 1
            except Exception as e:
                sampleOK = False
                logging.debug(f"sample={i} key={k} e={e}, skipping")
                missingValues += 1

        if sampleOK:
            samples.append(sample)

    logging.debug((f"samples used={len(samples)}, invalid samples="
                   f"{invalidSamples}, skipped header keys={missingHdrKeys},"
                   f" missing values={missingValues}"))

    codes_release(ibufr)
    return header, samples