예제 #1
0
    def test_bufr_read(self):
        """Test reading data and data quality on Metop-A MHS BUFR file"""

        test_file = os.path.join(test_dir, "metop_mhs.bufr")
        bfr = bufr.BUFRFile(test_file)
        data = bfr.next()
        self.assertEqual(data[0].name.strip(' '),
                         "TOVS/ATOVS PRODUCT QUALIFIER")
        self.assertEqual(data[0].data[0], 3)
        self.assertEqual(data[0].unit.strip(' '), "CODE TABLE 8070")
        self.assertEqual(data[0].index, 0)
예제 #2
0
    def test_bufr_interate(self):
        """Test reading all data, last entry"""

        test_file = os.path.join(test_dir, "metop_mhs.bufr")
        bfr = bufr.BUFRFile(test_file)
        for i, data in enumerate(bfr):
            pass
        self.assertEqual(i, 3)
        self.assertEqual(data[0].name.strip(' '),
                         "TOVS/ATOVS PRODUCT QUALIFIER")
        self.assertEqual(data[0].data[0], 3)
        self.assertEqual(data[0].unit.strip(' '), "CODE TABLE 8070")
        self.assertEqual(data[0].index, 0)
예제 #3
0
파일: bufrfile.py 프로젝트: whigg/PySOL
    def open(
        self,
        view=None,
        datamodel_geolocation_dims=None,
        datamodel=None,
    ):
        """Open the file (or any other type of storage)

        Args:
            view (dict, optional): a dictionary where keys are dimension names
                and values are slices. A view can be set on a file, meaning
                that only the subset defined by this view will be accessible.
                Any later use of slices in :func:`read_values` will be relative
                to the view defined here.

                This view is expressed as any subset (see :func:`read_values`).
                For example:

                .. code-block: python

                    view = {'time':slice(0,0), 'row':slice(200,300),
                        'cell':slice(200,300)}

        """
        self._handler = bufr.BUFRFile(self._url)
        super(BUFRFile, self).open(view=view, datamodel='Swath')
        if datamodel_geolocation_dims:
            self.datamodel_geolocation_dims = datamodel_geolocation_dims
        if datamodel is not None:
            self._feature_type = datamodel
        # force data buffering to get full file structure and content
        fieldnames = []
        units = []
        self._fields = {}
        data = []
        geolocdata = {}
        # read file content
        first = True
        nbrows = 0
        yearidx = None
        monthidx = None
        dayidx = None
        houridx = None
        minuteidx = None
        secondidx = None
        latidx = None
        lonidx = None
        indexes = {}
        duplicated_field_indice = {}
        for record in self._handler:
            nbrows += 1
            if first:
                nbcells = len(record[1].data)
                for entry in record:
                    # create valid name
                    entryname = (entry.name.lower().replace(
                        ' ', '_').strip('_').strip('*'))
                    logging.debug('entry name %s', entryname)
                    # identify geolocation information. We assume the first met
                    # entries with geolocation names are the correct ones. The
                    # next ones with the same names are processed as additional
                    # variables
                    if entryname == 'year' and yearidx is None:
                        yearidx = entry.index
                        logging.debug('entry index year = %s', yearidx)
                    elif entryname == 'month' and monthidx is None:
                        monthidx = entry.index
                    elif entryname == 'day' and dayidx is None:
                        dayidx = entry.index
                    elif entryname == 'hour' and houridx is None:
                        houridx = entry.index
                    elif entryname == 'minute' and minuteidx is None:
                        minuteidx = entry.index
                    elif entryname == 'second' and secondidx is None:
                        secondidx = entry.index
                    elif ('latitude' in entryname and latidx is None):
                        latidx = entry.index
                        geolocdata['lat'] = [entry.data]
                    elif ('longitude' in entryname and lonidx is None):
                        geolocdata['lon'] = [entry.data]
                        lonidx = entry.index
                    # decides if entry should be a field or an attribute
                    elif (entryname not in self.ATTRIBUTE_ENTRIES
                          and entryname not in self.SKIPPED_ENTRIES):
                        if entryname in duplicated_field_indice:
                            # case where different fields with the same name
                            # we had an incremental number and
                            # change the name of the first occurence
                            duplicated_field_indice[entryname] = 0
                            prev_occurence = fieldnames.index(entryname)
                            fieldnames[prev_occurence] = '_'.join([
                                entryname,
                                '%d' % duplicated_field_indice[entryname]
                            ])
                        if entryname in duplicated_field_indice:
                            duplicated_field_indice[entryname] += 1
                            entryname = '_'.join([
                                entryname,
                                '%d' % duplicated_field_indice[entryname]
                            ])
                        fieldnames.append(entryname)
                        indexes[entryname] = entry.index
                        # get unit in standard form
                        unit = entry.unit.strip()
                        if unit in UNITS:
                            units.append(UNITS[unit])
                        else:
                            units.append(unit)
                        data.append([entry.data])
                    elif entry.name in self.ATTRIBUTE_ENTRIES:
                        self.attributes[entryname] = entry.data[0]
                first = False
                for i in range(nbcells):
                    logging.debug('year %s', int(record[yearidx].data[i]))
                geolocdata['time'] = [
                    numpy.array([
                        (datetime.datetime(int(record[yearidx].data[i]),
                                           int(record[monthidx].data[i]),
                                           int(record[dayidx].data[i]),
                                           int(record[houridx].data[i]),
                                           int(record[minuteidx].data[i]),
                                           int(record[secondidx].data[i])) -
                         REFERENCE_TIME).total_seconds()
                        for i in range(nbcells)
                    ])
                ]
            else:
                # read arrays of data
                current_reccord_range_length = record[secondidx].data.shape[0]
                if nbcells == current_reccord_range_length:
                    for i, fieldname in enumerate(fieldnames):
                        data[i].append(record[indexes[fieldname]].data)
                    geolocdata['lon'].append(record[lonidx].data)
                    geolocdata['lat'].append(record[latidx].data)

                    geolocdata['time'].append(
                        numpy.array([(
                            datetime.datetime(int(record[yearidx].data[i]),
                                              int(record[monthidx].data[i]),
                                              int(record[dayidx].data[i]),
                                              int(record[houridx].data[i]),
                                              int(record[minuteidx].data[i]),
                                              int(record[secondidx].data[i])) -
                            REFERENCE_TIME).total_seconds()
                                     for i in range(nbcells)]))
        del self._handler
        self._handler = self
        # get dimensions (take into account the view)
        self._dimensions = collections.OrderedDict([('row', nbrows),
                                                    ('cell', nbcells)])
        newslices = cerbere.mapper.slices.get_absolute_slices(
            self.view,
            slices=None,
            dimnames=self._dimensions.keys(),
            dimsizes=self._dimensions.values())
        # get fields and cache data
        self._fields = {}
        for i, fieldname in enumerate(fieldnames):
            varobj = variable.Variable(fieldname, fieldname.replace('_', ' '))
            newfield = field.Field(
                variable=varobj,
                dimensions=self._dimensions,
                datatype=numpy.float32,
                values=numpy.ma.masked_equal(numpy.vstack(data[i]),
                                             self.FILLVALUE)[tuple(newslices)],
                fillvalue=self.FILLVALUE,
                units=units[i],
            )
            self._fields[fieldname] = newfield
        # geolocation
        self._geofields = {}
        varobj = variable.Variable(shortname='lat', description='latitude')
        newfield = field.Field(
            variable=varobj,
            dimensions=self._dimensions,
            datatype=numpy.float32,
            values=numpy.ma.masked_equal(numpy.vstack(geolocdata['lat']),
                                         self.FILLVALUE)[tuple(newslices)],
            fillvalue=self.FILLVALUE,
            units='degrees_north',
        )
        self._geofields['lat'] = newfield
        varobj = variable.Variable(shortname='lon', description='longitude')
        newfield = field.Field(
            variable=varobj,
            dimensions=self._dimensions,
            datatype=numpy.float32,
            values=numpy.ma.masked_equal(numpy.vstack(geolocdata['lon']),
                                         self.FILLVALUE)[tuple(newslices)],
            fillvalue=self.FILLVALUE,
            units='degrees_east',
        )
        self._geofields['lon'] = newfield
        var = variable.Variable(shortname='time', description='time')
        newfield = field.Field(
            variable=var,
            dimensions=self._dimensions,
            datatype=numpy.int32,
            values=numpy.vstack(geolocdata['time'])[tuple(newslices)],
            units=('seconds since %s' % datetime.datetime.strftime(
                REFERENCE_TIME, "%Y-%m-%d %H:%M:%S")))
        self._geofields['time'] = newfield
        return self._handler
예제 #4
0
def bufr2netcdf(instr_name, bufr_fn, nc_fn, dburl=None):
    """ Does the actual work in transforming the file """

    # Create file object and connect to database
    bfr = bufr.BUFRFile(bufr_fn)
    try:
        os.remove(nc_fn)
    except OSError:
        pass

    rootgrp = Dataset(nc_fn, 'w', format='NETCDF4')

    conn = None
    if dburl is not None:
        conn = bufrmetadb.BUFRDescDBConn(dburl)
    else:
        conn = bufrmetadb.BUFRDescDBConn()

    instr = conn.get_instrument(instr_name)

    #Get bufr record sections from database
    bstart = instr.bufr_record_start
    bend = instr.bufr_record_end
    transpose = instr.transposed

    logger.debug("start index: %s, end index: %s" % (bstart, bend))
    logger.debug("transposed : %s" % transpose)

    # Read BUFR file keys and get corresponding NetCDF names from
    # from the database. Fast forward to record start.
    for i in range(bstart + 1):
        records = bfr.read()

    # Set up accounting for each variable , to be used when wriing variables to
    # netcdf.
    bfr_count = []
    for r in records:
        bfr_count.append(0)

    vname_map = conn.get_netcdf_parameters_dict(instr_name)

    # get replication indicies, the indicies handle multiple records of the
    # same variable within a bufr subsection.
    replication_indicies = conn.get_replication_indicies(instr_name)

    # Create attributes
    _create_global_attributes(rootgrp, instr)

    # Create dimensions
    _create_dimensions(rootgrp, vname_map)

    #
    # Get list of variables which should be treated as constants and
    # global attributes, Notice these values should be constant for
    # every entry in the scanline so below we just insert the value
    # from a random scanline
    #
    global_var_attrs = conn.get_netcdf_global_attrs(instr_name)
    for record in records:
        if record.name in global_var_attrs:
            setattr(rootgrp, global_var_attrs[record.name], "%s" % \
                    record.data)

    # create variables
    _create_variables(rootgrp, vname_map)

    #
    # This section inserts data into the NetCDF variables
    #

    logger.debug("Closing netcdf handle after setup")

    rootgrp.close()

    logger.debug("Opening netcdf handle after setup")

    rootgrp = Dataset(nc_fn, 'a', format='NETCDF4')

    ##bfr.reset()
    del bfr
    bfr = bufr.BUFRFile(bufr_fn)
    scalars_handled = False

    # Loop though all sections and dump to netcdf
    #
    for count, section in enumerate(bfr):

        # manage record boundaries. In some cases BUFR sections differ within a
        # file . This enables the user to only convert similar sections
        if count < bstart:
            continue
        if bend is not -1 and count > bend - 1:
            break

        mysection = section
        if transpose:
            # allocate container for new transposed data
            transposed_section = []

            # F**K ! WTE !!!! (What The EUMETSAT !!!) SSMIS BUFR from SDR
            # records are sorted by field-of-view and not by scanline hence the
            # need to collect and transpose data.

            # Collect the record indicies we need
            indicies = []

            # Structure for keeping trac of base entry and replicated entries
            index_groups = {}

            for rec1 in section:
                try:
                    nc_name = vname_map[rec1.index]\
                            ['netcdf_name']
                    nc_dim_length = vname_map[rec1.index]\
                            ['netcdf_dimension_length']

                    index_groups[rec1.index] = []

                    # we need to collect lines matching by bufr replication we
                    # assume that only the original entry has a netcdf_name
                    # assigned to it.
                    for rec2 in section:
                        linked_index = replication_indicies[rec2.index]
                        if linked_index == rec1.index:
                            if rec2.index not in indicies:
                                indicies.append(rec2.index)
                                index_groups[rec1.index].append(rec2.index)
                            else:
                                # Make sure that we don't have replicated
                                # entries that links to different base entries
                                raise BUFR2NetCDFError(
                                    "Unable to transpose section, ambiguous variable naming"
                                )

                except KeyError:
                    pass

            # Collect similar variables into a single array consisting of
            # stacked data rows. If the columns are order by field-of-view we
            # need to transpose the entire array to get the data ordered by
            # scanlines. This is the case with SSMIS data from Eumetcast

            for key, index_group in index_groups.iteritems():
                old_entry = section[key]
                new_data = np.vstack(\
                        [section[i].data for i in index_group]).transpose()
                # transposed all linked scanlines and hardlink replication base
                # key
                for scanline in new_data:
                    transposed_section.append(
                        bufr.BUFRFileEntry(old_entry.index, old_entry.name,
                                           old_entry.unit,
                                           scanline[:nc_dim_length]))

            # reassign whole section to new transposed section
            mysection = transposed_section

        if transpose and (len(section) != len(transposed_section)):
            logger.debug("Different section lengths orig. %s transposed %s" %\
                    (len(section), len(transposed_section)))

        for record in mysection:

            if record is None:
                # This record is set to none by the transpose functionality,
                # see above. Just ignore the record and continue
                continue

            # linked index handles BUFR replication factors with multiple data
            # entries in one subsection.
            try:
                linked_index = replication_indicies[record.index]
            except KeyError, e:
                continue

            # only try to convert variables that define the netcdf_name
            # parameter
            try:
                nc_var = rootgrp.variables[vname_map[linked_index]\
                        ['netcdf_name']]
            except KeyError:
                continue

            _insert_record(vname_map, nc_var, record, scalars_handled,
                           bfr_count[linked_index], linked_index)

            # This variable determines which record number in the netcdf variables the
            # data should be stored
            bfr_count[linked_index] += 1

        # we have inserted the first bufr section and hence all variables that
        # can be packed into scalars or per scan vectors should be accounted for
        scalars_handled = True

        # We have inserted all data for this subsection. We need to level out
        # the differences in the unlimited dimension. The differenences stems
        # from the bufr replication factor.

        # find the max index of the unlimited dimension
        max_record = max(bfr_count)

        for record in mysection:

            if record is None:
                # record set to None by transpose functionality above
                continue

            # only insert fill data for records with a netcdf_name attribute
            try:
                nc_var = rootgrp.variables[vname_map[record.index]\
                        ['netcdf_name']]
            except KeyError:
                continue

            fill_rows = max_record - bfr_count[record.index]
            for i in range(fill_rows):
                _insert_record(vname_map, nc_var, record, scalars_handled,
                               bfr_count[record.index], record.index)
                # This variable determines which record number in the netcdf variables the
                # data should be stored
                bfr_count[record.index] += 1
예제 #5
0
import bufr
f = '/tmp/ascat_20150531_013300_metopa_44686_eps_o_coa_ovw.l2_bufr'
hh = bufr.BUFRFile(f)
var = []
unit = []
data = []
for record in hh:
    for entry in record:
        entryname = (entry.name.lower().replace(' ',
                                                '_').strip('_').strip('*'))
        if entryname not in var:
            var.append(entryname)
            unit.append(entry.unit)
            data.append(entry.data[0])
            print entryname, entry.unit, entry.data[0]
예제 #6
0
def bufr2netcdf(instr_name, bufr_fn, nc_fn, dburl=None):
    """ Does the actual work in transforming the file """
    
    # Create file object and connect to database
    ncf = NetCDF.NetCDFFile(nc_fn,'w')
    bfr = bufr.BUFRFile(bufr_fn)

    conn = None
    if dburl is not None:
        conn = bufr.metadb.BUFRDescDBConn(dburl)
    else:
        conn = bufr.metadb.BUFRDescDBConn()

    instr = conn.get_instrument(instr_name)

    #Get bufr record sections from database
    bstart = instr.bufr_record_start
    bend = instr.bufr_record_end
    
    bfr.reset()

    # Read BUFR file keys and get corresponding NetCDF names from 
    # from the database. Fast forward to record start. 
    for i in range(bstart+1):
        records = bfr.read()
    bfr_keys = [r.index for r in records]
    vname_map = {}
    for k in bfr_keys:
        vname_map[k] = conn.get_netcdf_parameters(instr_name, k)
    
    # Create attributes 
    _create_global_attributes(ncf, instr)

    # Create dimensions
    _create_dimensions(ncf, vname_map)

    #
    # Get list of variables which should be treated as constants and
    # global attributes, Notice these values should be constant for
    # every entry in the scanline so below we just insert the value
    # from a random scanline 
    #
    global_var_attrs = conn.get_netcdf_global_attrs(instr_name)
    for record in records:
        if record.name in global_var_attrs:
            print "attri %s" % record.name
            setattr(ncf, global_var_attrs[record.name], "%s" % \
                    record.data)

    # create variables
    _create_variables(ncf, vname_map)

    # close file and reopen in append mode
    ncf.close()
    ncf = NetCDF.NetCDFFile(nc_fn,'a')

    #
    # Insert data into variables
    #
  
    bfr.reset()
    bfr.next()
    count = -1
    scalars_handled = False
    for section in bfr:
        count = count + 1 
        ##ncf.sync()
        # manage record boundaries... 
        if count < bstart: 
            continue
        if bend is not -1 and count > bend-1:
            break
        for record in section:
            # only try to convert variables that define the netcdf_name
            # parameter
            try:
                nc_var = ncf.variables[vname_map[record.index]['netcdf_name']]
            except KeyError:
                continue
           
            _insert_record(vname_map, nc_var, record, scalars_handled, count)

        # we have inserted the first bufr section and hence all variables that
        # can be packed into scalars or per scan vectors should be accounted for
        scalars_handled = True 

    ncf.close()