Beispiel #1
0
    def test_from_bad_string(self):
        u = IoosUrn.from_string('urn:ioos:sensor:whatami')
        assert u.urn is None

        u = IoosUrn.from_string('urn:ioos:nothinghere')
        assert u.urn is None

        u = IoosUrn.from_string('urn:totesbroken')
        assert u.urn is None
Beispiel #2
0
    def test_from_bad_string(self):
        u = IoosUrn.from_string('urn:ioos:sensor:whatami')
        assert u.urn is None

        u = IoosUrn.from_string('urn:ioos:nothinghere')
        assert u.urn is None

        u = IoosUrn.from_string('urn:totesbroken')
        assert u.urn is None
Beispiel #3
0
    def add_instrument_variable(self, variable_name):
        if variable_name not in self._nc.variables:
            logger.error("Variable {} not found in file, cannot create instrument metadata variable")
            return
        elif 'id' not in self._nc.ncattrs() or 'naming_authority' not in self._nc.ncattrs():
            logger.error("Global attributes 'id' and 'naming_authority' are required to create an instrument variable")
            return

        instr_var_name = "{}_instrument".format(variable_name)
        instrument = self._nc.createVariable(instr_var_name, "i4")

        datavar = self._nc.variables[variable_name]
        vats = { k: getattr(datavar, k) for k in datavar.ncattrs() }
        instrument_urn = urnify(self._nc.naming_authority, self._nc.id, vats)

        inst_urn = IoosUrn.from_string(instrument_urn)
        instrument.long_name = 'Instrument measuring {} from {}'.format(inst_urn.component, inst_urn.label)
        instrument.ioos_code = instrument_urn
        instrument.short_name = inst_urn.component
        instrument.definition = "http://mmisw.org/ont/ioos/definition/sensorID"

        datavar.instrument = instr_var_name

        # Append the instrument to the ancilary variables
        av = getattr(datavar, 'ancillary_variables', '')
        av += ' {}'.format(instr_var_name)
        datavar.ancillary_variables = av.strip()

        self._nc.sync()
def create_file(output, ncfile, varname, df):
    with EnhancedDataset(ncfile) as ncd:
        var = ncd[varname]

        latitude = ncd.get_variables_by_attributes(
            standard_name='latitude')[0][:]
        longitude = ncd.get_variables_by_attributes(
            standard_name='longitude')[0][:]
        project = ncd.original_folder
        feature_name = '{}_{}'.format(project, ncd.MOORING).lower()

        station_urn = IoosUrn(authority=ncd.naming_authority,
                              label=feature_name,
                              asset_type='station').urn

        discriminant = ncd.id.replace('-', '_')
        output_filename = '{0}_{1}-{2}_{3}_TO_{4}.nc'.format(
            feature_name, var.name, discriminant,
            df['time'].min().strftime("%Y%m%dT%H%M%SZ"),
            df['time'].max().strftime("%Y%m%dT%H%M%SZ"))
        output_directory = os.path.join(output, feature_name)

        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)

        file_global_attributes = {k: getattr(ncd, k) for k in ncd.ncattrs()}
        # original_folder is the project name
        file_global_attributes.update(
            dict(title='{} - {}'.format(project, ncd.MOORING),
                 id=feature_name))

        variable_attributes = {k: getattr(var, k) for k in var.ncattrs()}
        # Add the specific sensor as a discriminant
        variable_attributes.update(dict(discriminant=discriminant))

        fillvalue = -9999.9
        if hasattr(var, "_FillValue"):
            fillvalue = var._FillValue

        vertical_datum = None
        if 'crs' in ncd.variables and hasattr(ncd.variables['crs'],
                                              'vertical_datum'):
            vertical_datum = ncd.variables['crs'].vertical_datum

        ts = TimeSeries.from_dataframe(df,
                                       output_directory,
                                       output_filename,
                                       latitude,
                                       longitude,
                                       station_urn,
                                       file_global_attributes,
                                       var.standard_name,
                                       variable_attributes,
                                       sensor_vertical_datum=vertical_datum,
                                       fillvalue=fillvalue,
                                       vertical_axis_name='height',
                                       vertical_positive='down')
        ts.add_instrument_variable(variable_name=var.standard_name)
        del ts
Beispiel #5
0
 def test_cdiac_urn(self):
     u = IoosUrn.from_string(
         'urn:ioos:sensor:gov.ornl.cdiac:cheeca_80w_25n:sea_water_temperature'
     )
     assert u.asset_type == 'sensor'
     assert u.authority == 'gov.ornl.cdiac'
     assert u.label == 'cheeca_80w_25n'
     assert u.component == 'sea_water_temperature'
Beispiel #6
0
 def test_messy_urn(self):
     u = IoosUrn.from_string(
         'urn:ioos:sensor:myauthority:mylabel:standard_name#key=key1:value1,key2:value2;some_other_key=some_other_value'
     )
     assert u.asset_type == 'sensor'
     assert u.authority == 'myauthority'
     assert u.label == 'mylabel'
     assert u.component == 'standard_name#key=key1:value1,key2:value2;some_other_key=some_other_value'
Beispiel #7
0
    def test_from_string(self):
        u = IoosUrn.from_string('urn:ioos:sensor:myauthority:mylabel')
        assert u.asset_type == 'sensor'
        assert u.authority  == 'myauthority'
        assert u.label      == 'mylabel'

        u = IoosUrn.from_string('urn:ioos:sensor:myauthority:mylabel:mycomponent')
        assert u.asset_type == 'sensor'
        assert u.authority  == 'myauthority'
        assert u.label      == 'mylabel'
        assert u.component  == 'mycomponent'

        u = IoosUrn.from_string('urn:ioos:sensor:myauthority:mylabel:mycomponent:myversion')
        assert u.asset_type == 'sensor'
        assert u.authority  == 'myauthority'
        assert u.label      == 'mylabel'
        assert u.component  == 'mycomponent'
        assert u.version    == 'myversion'
Beispiel #8
0
    def test_change_sensor_to_station(self):
        u = IoosUrn.from_string('urn:ioos:sensor:myauthority:mylabel:mycomponent')
        assert u.asset_type == 'sensor'
        assert u.authority  == 'myauthority'
        assert u.label      == 'mylabel'
        assert u.component  == 'mycomponent'

        u.asset_type = 'station'
        u.component = None
        assert u.urn == 'urn:ioos:station:myauthority:mylabel'
Beispiel #9
0
    def test_change_sensor_to_station(self):
        u = IoosUrn.from_string(
            'urn:ioos:sensor:myauthority:mylabel:mycomponent')
        assert u.asset_type == 'sensor'
        assert u.authority == 'myauthority'
        assert u.label == 'mylabel'
        assert u.component == 'mycomponent'

        u.asset_type = 'station'
        u.component = None
        assert u.urn == 'urn:ioos:station:myauthority:mylabel'
Beispiel #10
0
    def test_from_string(self):
        u = IoosUrn.from_string('urn:ioos:sensor:myauthority:mylabel')
        assert u.asset_type == 'sensor'
        assert u.authority == 'myauthority'
        assert u.label == 'mylabel'

        u = IoosUrn.from_string(
            'urn:ioos:sensor:myauthority:mylabel:mycomponent')
        assert u.asset_type == 'sensor'
        assert u.authority == 'myauthority'
        assert u.label == 'mylabel'
        assert u.component == 'mycomponent'

        u = IoosUrn.from_string(
            'urn:ioos:sensor:myauthority:mylabel:mycomponent:myversion')
        assert u.asset_type == 'sensor'
        assert u.authority == 'myauthority'
        assert u.label == 'mylabel'
        assert u.component == 'mycomponent'
        assert u.version == 'myversion'
Beispiel #11
0
    def test_setattr(self):
        u = IoosUrn()
        u.asset_type = 'sensor'
        u.authority = 'me'
        u.label = 'mysupersensor'
        assert u.urn == 'urn:ioos:sensor:me:mysupersensor'

        u.version = 'abc'
        assert u.urn == 'urn:ioos:sensor:me:mysupersensor:abc'

        u.component = 'temp'
        assert u.urn == 'urn:ioos:sensor:me:mysupersensor:temp:abc'
Beispiel #12
0
    def test_setattr(self):
        u = IoosUrn()
        u.asset_type = 'sensor'
        u.authority = 'me'
        u.label = 'mysupersensor'
        assert u.urn == 'urn:ioos:sensor:me:mysupersensor'

        u.version = 'abc'
        assert u.urn == 'urn:ioos:sensor:me:mysupersensor:abc'

        u.component = 'temp'
        assert u.urn == 'urn:ioos:sensor:me:mysupersensor:temp:abc'
Beispiel #13
0
 def test_from_long_string(self):
     u = IoosUrn.from_string(
         'urn:ioos:sensor:whatami:wow:i:have:lots:of:things')
     assert u.urn == 'urn:ioos:sensor:whatami:wow:i:have'
Beispiel #14
0
    def __init__(self, output_directory, latitude, longitude, station_name, global_attributes, times=None, verticals=None, vertical_fill=None, output_filename=None, vertical_axis_name=None, vertical_positive=None):
        if output_filename is None:
            output_filename = '{}_{}.nc'.format(station_name, int(random.random() * 100000))
            logger.info("No output filename specified, saving as {}".format(output_filename))

        self.vertical_positive  = vertical_positive or 'down'
        self.vertical_axis_name = vertical_axis_name or 'z'
        self.time_axis_name     = 'time'

        # Make directory
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        self.time = None

        self.out_file = os.path.abspath(os.path.join(output_directory, output_filename))
        if os.path.isfile(self.out_file):
            os.remove(self.out_file)

        with EnhancedDataset(self.out_file, 'w') as nc:
            # Global attributes
            # These are set by this script, we don't someone to be able to set them manually
            global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution",
                            "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max",
                            "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max", "geospatial_bounds"
                            "geospatial_vertical_resolution", "geospatial_lat_resolution", "geospatial_lon_resolution",
                            "Conventions", "date_created", "date_modified", "date_issued"]
            for k, v in global_attributes.items():
                if v is None:
                    v = "None"
                if k not in global_skips:
                    nc.setncattr(k, v)

            now_date = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z")
            nc.setncattr("Conventions", "CF-1.6,ACDD-1.3")
            nc.setncattr("date_created", now_date)
            nc.setncattr("date_modified", now_date)
            nc.setncattr("date_issued", now_date)
            if not hasattr(nc, "date_metadata_modified"):
                nc.setncattr("date_metadata_modified", now_date)

            # Allow the customization of this attribute
            if 'cdm_data_type' not in global_attributes:
                nc.setncattr('cdm_data_type', 'Station')

            old_history = getattr(nc, 'history', '')
            new_history = '{} - {} - {}'.format(now_date, 'pyaxiom', 'File created using pyaxiom')
            if old_history:
                nc.setncattr('history', '{}\n{}'.format(old_history, new_history))
            else:
                nc.setncattr('history', new_history)

            # Station name
            nc.createDimension("feature_type_instance", len(station_name))
            name = nc.createVariable("feature_type_instance", "S1", ("feature_type_instance",))
            name.cf_role = "timeseries_id"
            name.long_name = "Identifier for each feature type instance"
            name[:] = list(station_name)

            # Location
            lat = nc.createVariable("latitude", get_type(latitude))
            lat.units           = "degrees_north"
            lat.standard_name   = "latitude"
            lat.long_name       = "sensor latitude"
            lat.axis            = "Y"
            lat.valid_min       = latitude
            lat.valid_max       = latitude
            lat[:] = latitude
            nc.setncattr("geospatial_lat_min", latitude)
            nc.setncattr("geospatial_lat_max", latitude)
            nc.setncattr("geospatial_lat_resolution", 0)
            nc.setncattr("geospatial_lat_units", "degrees_north")

            lon = nc.createVariable("longitude", get_type(longitude))
            lon.units           = "degrees_east"
            lon.standard_name   = "longitude"
            lon.long_name       = "sensor longitude"
            lon.axis            = "X"
            lon.valid_min       = longitude
            lon.valid_max       = longitude
            lon[:] = longitude
            nc.setncattr("geospatial_lon_min", longitude)
            nc.setncattr("geospatial_lon_max", longitude)
            nc.setncattr("geospatial_lon_resolution", 0)
            nc.setncattr("geospatial_lon_units", "degrees_east")

            nc.setncattr("geospatial_bounds", "POINT({} {})".format(longitude, latitude))
            if not hasattr(nc, "geospatial_bounds_crs"):
                nc.setncattr("geospatial_bounds_crs", "EPSG:4326")

            # Metadata variables
            self.crs = nc.createVariable("crs", "i4")
            self.crs.long_name           = "http://www.opengis.net/def/crs/EPSG/0/4326"
            self.crs.grid_mapping_name   = "latitude_longitude"
            self.crs.epsg_code           = "EPSG:4326"
            self.crs.semi_major_axis     = float(6378137.0)
            self.crs.inverse_flattening  = float(298.257223563)

            platform = nc.createVariable("platform", "i4")
            platform.definition = "http://mmisw.org/ont/ioos/definition/stationID"

            urn = IoosUrn.from_string(station_name)
            if urn.valid() is True:
                platform.short_name = global_attributes.get("title", urn.label)
                platform.long_name = global_attributes.get('summary', 'Station {}'.format(urn.label))
                platform.ioos_code = urn.urn
            else:
                platform.short_name = global_attributes.get("title", station_name)
                platform.long_name = global_attributes.get("summary", station_name)
                platform.ioos_code = station_name

            if vertical_fill is None:
                vertical_fill = -9999.9
            self.vertical_fill = vertical_fill

        self._nc = EnhancedDataset(self.out_file, 'a')
        self.setup_times_and_verticals(times, verticals)
        logger.info("Created file at '{}'".format(self.out_file))
Beispiel #15
0
def dictify_urn(urn, combine_interval=True):
    """
        By default, this will put the `interval` as part of the `cell_methods`
        attribute (NetCDF CF style). To return `interval` as its own key, use
        the `combine_interval=False` parameter.
    """
    ioos_urn = IoosUrn.from_string(urn)

    if ioos_urn.valid() is False:
        return dict()

    if ioos_urn.asset_type != 'sensor':
        logger.error("This function only works on 'sensor' URNs.")
        return dict()

    if '#' in ioos_urn.component:
        standard_name, extras = ioos_urn.component.split('#')
    else:
        standard_name = ioos_urn.component
        extras = ''

    d = dict(standard_name=standard_name)

    # Discriminant
    if '-' in ioos_urn.component:
        d['discriminant'] = standard_name.split('-')[-1]
        d['standard_name'] = standard_name.split('-')[0]

    intervals = []
    cell_methods = []
    if extras:
        for section in extras.split(';'):
            key, values = section.split('=')
            if key == 'interval':
                # special case, intervals should be appended to the cell_methods
                for v in values.split(','):
                    intervals.append(v)
            else:
                if key == 'cell_methods':
                    value = [ x.replace('_', ' ').replace(':', ': ') for x in values.split(',') ]
                    cell_methods = value
                else:
                    value = ' '.join([x.replace('_', ' ').replace(':', ': ') for x in values.split(',')])
                    d[key] = value

    if combine_interval is True:
        if cell_methods and intervals:
            if len(cell_methods) == len(intervals):
                d['cell_methods'] = ' '.join([ '{} (interval: {})'.format(x[0], x[1].upper()) for x in zip(cell_methods, intervals) ])
            else:
                d['cell_methods'] = ' '.join(cell_methods)
                for i in intervals:
                    d['cell_methods'] += ' (interval: {})'.format(i.upper())
        elif cell_methods:
            d['cell_methods'] = ' '.join(cell_methods)
            for i in intervals:
                d['cell_methods'] += ' (interval: {})'.format(i.upper())
        elif intervals:
            raise ValueError("An interval without a cell_method is not allowed!  Not possible!")
    else:
        d['cell_methods'] = ' '.join(cell_methods)
        d['interval'] = ','.join(intervals).upper()

    if 'vertical_datum' in d:
        d['vertical_datum'] = d['vertical_datum'].upper()

    return d
Beispiel #16
0
def urnify_from_dict(naming_authority, station_identifier, data_dict):

    def clean_value(v):
        return v.replace('(', '').replace(')', '').strip().replace(' ', '_')
    extras = []
    intervals = []  # Because it can be part of cell_methods and its own dict key

    if 'cell_methods' in data_dict and data_dict['cell_methods']:
        cm = data_dict['cell_methods']
        keys = []
        values = []
        sofar = ''
        for i, c in enumerate(cm):
            if c == ":":
                if len(keys) == len(values):
                    keys.append(clean_value(sofar))
                else:
                    for j in reversed(range(0, i)):
                        if cm[j] == " ":
                            key = clean_value(cm[j+1:i])
                            values.append(clean_value(sofar.replace(key, '')))
                            keys.append(key)
                            break
                sofar = ''
            else:
                sofar += c
        # The last value needs appending
        values.append(clean_value(sofar))

        pairs = zip(keys, values)

        mems = []
        cell_intervals = []
        pairs = sorted(pairs)
        for group, members in itertools.groupby(pairs, lambda x: x[0]):
            if group == 'interval':
                cell_intervals = [m[1] for m in members]
            elif group in ['time', 'area']:  # Ignore 'comments'. May need to add more things here...
                member_strings = []
                for m in members:
                    member_strings.append('{}:{}'.format(group, m[1]))
                mems.append(','.join(member_strings))
        if mems:
            extras.append('cell_methods={}'.format(','.join(mems)))
        if cell_intervals:
            intervals += cell_intervals

    if 'bounds' in data_dict and data_dict['bounds']:
        extras.append('bounds={0}'.format(data_dict['bounds']))

    if 'vertical_datum' in data_dict and data_dict['vertical_datum']:
        extras.append('vertical_datum={0}'.format(data_dict['vertical_datum']))

    if 'interval' in data_dict and data_dict['interval']:
        if isinstance(data_dict['interval'], (list, tuple,)):
            intervals += data_dict['interval']
        elif isinstance(data_dict['interval'], str):
            intervals += [data_dict['interval']]

    if 'standard_name' in data_dict and data_dict['standard_name']:
        variable_name = data_dict['standard_name']
    elif 'name' in data_dict and data_dict['name']:
        variable_name = data_dict['name']
    else:
        variable_name = ''.join(random.choice(string.ascii_uppercase) for _ in range(8)).lower()
        logger.warning("Had to randomly generate a variable name: {0}".format(variable_name))

    if 'discriminant' in data_dict and data_dict['discriminant']:
        variable_name = '{}-{}'.format(variable_name, data_dict['discriminant'])

    if intervals:
        intervals = list(set(intervals))  # Unique them
        extras.append('interval={}'.format(','.join(intervals)))

    if extras:
        variable_name = '{0}#{1}'.format(variable_name, ';'.join(extras))

    u = IoosUrn(asset_type='sensor',
                authority=naming_authority,
                label=station_identifier,
                component=variable_name,
                version=None)

    return u.urn
Beispiel #17
0
def main(output, station, datatype):

    if datatype == 'met':
        headers = met_header
        mapping = met_mapping
    elif datatype == 'waves':
        headers = waves_header
        mapping = waves_mapping
    elif datatype == 'currents':
        headers = currents_header
        mapping = currents_mapping

    df = None

    def dp(*args):
        datestr = "".join([str(x) for x in args])
        try:
            return datetime.strptime(datestr, '%Y %m %d %H %M %S')
        except ValueError:
            return np.nan

    datapath = os.path.abspath(
        os.path.join(os.path.dirname(__file__), 'data', datatype))
    for csv_file in sorted(os.listdir(datapath)):
        f = os.path.join(datapath, csv_file)
        cf = pd.read_csv(
            f,
            header=None,
            names=headers,
            parse_dates={
                'time': ['year', 'month', 'day', 'hour', 'minute', 'second']
            },
            date_parser=dp)
        cf.dropna(subset=['time'], inplace=True)

        if df is None:
            df = cf
        else:
            df = df.append(cf)

    fillvalue = -9999.9
    # Station metadata
    stat_meta = stations[station]
    station_urn = IoosUrn(asset_type='station',
                          authority=global_attributes['naming_authority'],
                          label=stat_meta['title'])

    for var in df.columns:

        try:
            var_meta = mapping[var]
        except KeyError:
            logger.error(
                "Variable {!s} was not found in variable map!".format(var))
            continue

        sensor_urn = urnify(station_urn.authority, station_urn.label, var_meta)

        gas = copy(global_attributes)
        gas['keywords'] = var_meta['keywords']
        gas['title'] = stat_meta['title']
        gas['description'] = stat_meta['description']

        skip_variable_attributes = [
            'keywords', 'height_above_site', 'depth_below_surface',
            'add_offset'
        ]
        vas = {
            k: v
            for k, v in var_meta.items() if k not in skip_variable_attributes
        }

        if var_meta.get('height_above_site') and stat_meta.get('site_height'):
            # Convert to positive down
            df['depth'] = -1 * (stat_meta['site_height'] +
                                var_meta['height_above_site'])
        else:
            df['depth'] = var_meta.get('depth_below_surface', np.nan)

        if 'add_offset' in var_meta:
            df[var] = df[var] + var_meta['add_offset']

        output_filename = '{}_{}_{}.nc'.format(station_urn.label, datatype,
                                               var_meta['standard_name'])
        ts = TimeSeries.from_dataframe(
            df,
            output,
            output_filename,
            stat_meta['latitude'],
            stat_meta['longitude'],
            station_urn.urn,
            gas,
            var_meta["standard_name"],
            vas,
            sensor_vertical_datum=var_meta.get('vertical_datum'),
            fillvalue=fillvalue,
            data_column=var,
            vertical_axis_name='height',
            vertical_positive='down')
        ts.add_instrument_metadata(urn=sensor_urn)
Beispiel #18
0
 def test_args(self):
     u = IoosUrn(asset_type='sensor', authority='me', label='mysupersensor')
     assert u.urn == 'urn:ioos:sensor:me:mysupersensor'
Beispiel #19
0
def dictify_urn(urn, combine_interval=True):
    """
        By default, this will put the `interval` as part of the `cell_methods`
        attribute (NetCDF CF style). To return `interval` as its own key, use
        the `combine_interval=False` parameter.
    """
    ioos_urn = IoosUrn.from_string(urn)

    if ioos_urn.valid() is False:
        return dict()

    if ioos_urn.asset_type != 'sensor':
        logger.error("This function only works on 'sensor' URNs.")
        return dict()

    if '#' in ioos_urn.component:
        standard_name, extras = ioos_urn.component.split('#')
    else:
        standard_name = ioos_urn.component
        extras = ''

    d = dict(standard_name=standard_name)

    # Discriminant
    if '-' in ioos_urn.component:
        d['discriminant'] = ioos_urn.component.split('-')[-1]
        d['standard_name'] = ioos_urn.component.split('-')[0]

    intervals = []
    cell_methods = []
    if extras:
        for section in extras.split(';'):
            key, values = section.split('=')
            if key == 'interval':
                # special case, intervals should be appended to the cell_methods
                for v in values.split(','):
                    intervals.append(v)
            else:
                if key == 'cell_methods':
                    value = [ x.replace('_', ' ').replace(':', ': ') for x in values.split(',') ]
                    cell_methods = value
                else:
                    value = ' '.join([x.replace('_', ' ').replace(':', ': ') for x in values.split(',')])
                    d[key] = value

    if combine_interval is True:
        if cell_methods and intervals:
            if len(cell_methods) == len(intervals):
                d['cell_methods'] = ' '.join([ '{} (interval: {})'.format(x[0], x[1].upper()) for x in zip(cell_methods, intervals) ])
            else:
                d['cell_methods'] = ' '.join(cell_methods)
                for i in intervals:
                    d['cell_methods'] += ' (interval: {})'.format(i.upper())
        elif cell_methods:
            d['cell_methods'] = ' '.join(cell_methods)
            for i in intervals:
                d['cell_methods'] += ' (interval: {})'.format(i.upper())
        elif intervals:
            raise ValueError("An interval without a cell_method is not allowed!  Not possible!")
    else:
        d['cell_methods'] = ' '.join(cell_methods)
        d['interval'] = ','.join(intervals).upper()

    if 'vertical_datum' in d:
        d['vertical_datum'] = d['vertical_datum'].upper()

    return d
Beispiel #20
0
 def test_cdiac_urn(self):
     u = IoosUrn.from_string('urn:ioos:sensor:gov.ornl.cdiac:cheeca_80w_25n:sea_water_temperature')
     assert u.asset_type == 'sensor'
     assert u.authority  == 'gov.ornl.cdiac'
     assert u.label      == 'cheeca_80w_25n'
     assert u.component  == 'sea_water_temperature'
Beispiel #21
0
 def test_constructor_no_data(self):
     u = IoosUrn()
     assert u.urn is None
Beispiel #22
0
 def test_constructor_with_bad_data(self):
     u = IoosUrn(notanattribute='foo')
     assert u.urn is None
Beispiel #23
0
 def test_station_cant_have_component(self):
     u = IoosUrn(asset_type='station', component='something')
     assert u.urn is None
Beispiel #24
0
 def test_from_long_string(self):
     u = IoosUrn.from_string('urn:ioos:sensor:whatami:wow:i:have:lots:of:things')
     assert u.urn == 'urn:ioos:sensor:whatami:wow:i:have'
Beispiel #25
0
 def test_no_label(self):
     u = IoosUrn(asset_type='station', authority='me')
     assert u.urn is None
Beispiel #26
0
 def test_messy_urn(self):
     u = IoosUrn.from_string('urn:ioos:sensor:myauthority:mylabel:standard_name#key=key1:value1,key2:value2;some_other_key=some_other_value')
     assert u.asset_type == 'sensor'
     assert u.authority  == 'myauthority'
     assert u.label      == 'mylabel'
     assert u.component  == 'standard_name#key=key1:value1,key2:value2;some_other_key=some_other_value'
Beispiel #27
0
def main(output_format, output, do_download, download_folder, filesubset=None):

    if do_download is True:

        try:
            os.makedirs(download_folder)
        except OSError:
            pass

        waf = 'http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/'

        r = requests.get(waf)
        soup = BeautifulSoup(r.text, "lxml")

        for link in soup.find_all('a'):

            # Skip non .txt files
            site_id, ext = os.path.splitext(link['href'])
            if ext != ".txt":
                continue

            if filesubset and site_id.lower() not in filesubset:
                # Skip this file!
                continue

            csv_link = waf + link['href']
            logger.info("Downloading '{}'".format(csv_link))
            d = requests.get(csv_link)
            try:
                d.raise_for_status()
            except requests.exceptions.HTTPError:
                logger.error(
                    "Could not download: {!s}, skipping. Status code: {!s}".
                    format(csv_link, d.status_code))
                continue

            with open(
                    os.path.join(download_folder, os.path.basename(csv_link)),
                    'wt') as f:
                f.write(d.text)

    # Yes, this uses lots of RAM, but we need to match up lon/lat positions later on.
    results = []
    for datafile in os.listdir(download_folder):

        site_id = os.path.splitext(os.path.basename(datafile))[0]

        if filesubset and site_id.lower() not in filesubset:
            # Skip this file!
            continue

        with open(os.path.join(download_folder, datafile)) as d:
            contents = d.read()
            r = None
            for line in contents.split("\n"):
                if "agency_cd" in line:
                    r = parse_type_1(output_format, site_id, contents, output)
                    break
                elif "date_time_GMT" in line:
                    r = parse_type_2(output_format, site_id, contents, output)
                    break
                else:
                    continue

            if r is None:
                logger.error('Could not process file: {}'.format(datafile))
            else:
                logger.info("Processed {}".format(datafile))
                results.append(r)

    results = sorted(results, key=attrgetter('lon', 'lat'))
    gresults = groupby(results, attrgetter('lon', 'lat'))

    for (glon, glat), group in gresults:

        groups = [x for x in list(group) if x]

        # Strip off the variable type if need be
        gsite = groups[0].site
        if gsite[-2:] in ['WV', 'BP', 'WL']:
            gsite = gsite[:-2]

        for result in groups:

            gas = get_globals(glat, glon, result.z, result.name, gsite)
            station_urn = IoosUrn(asset_type='station',
                                  authority=gas['naming_authority'],
                                  label=gsite)

            if output_format == 'cf16':
                # If CF, a file for each result dataframe
                times = [
                    calendar.timegm(x.timetuple()) for x in result.df['time']
                ]
                verticals = result.df['depth'].values
                output_filename = '{}.nc'.format(result.site)
                ts = TimeSeries(output,
                                latitude=glat,
                                longitude=glon,
                                station_name=gsite,
                                global_attributes=gas,
                                output_filename=output_filename,
                                times=times,
                                verticals=verticals)

            for var in result.df.columns:
                if var in [
                        'date_time_GMT', 'datetime', 'time', 'depth', 'tz_cd',
                        'site_no', 'agency_cd'
                ]:
                    continue

                try:
                    var_meta = copy(variable_map[var])
                except KeyError:
                    logger.error(
                        "Variable {!s} was not found in variable map!".format(
                            var))
                    continue

                # Convert to floats
                result.df[var] = result.df[var].map(to_floats)
                if var_meta["units"].lower() in ["feet", "ft"]:
                    result.df[var] = result.df[var].apply(
                        lambda x: None if pd.isnull(x) else x * 0.3048)
                    var_meta["units"] = "meters"
                elif var_meta["units"].lower() in ["psi"]:
                    result.df[var] = result.df[var].apply(
                        lambda x: None if pd.isnull(x) else x * 68.9476)
                    var_meta["units"] = "mbar"
                elif var_meta["units"].lower() in ['millimeters of mercury']:
                    result.df[var] = result.df[var].apply(
                        lambda x: None if pd.isnull(x) else x * 1.33322)
                    var_meta["units"] = "mbar"

                # Now put the fillvalue we want to be interpreted
                result.df.fillna(fillvalue, inplace=True)

                if output_format == 'axiom':
                    # If Axiom, a file for each variable
                    output_directory = os.path.join(output, gsite)
                    output_filename = '{}_{}.nc'.format(
                        result.site, var_meta['standard_name'])
                    ts = TimeSeries.from_dataframe(
                        result.df,
                        output_directory,
                        output_filename,
                        glat,
                        glon,
                        station_urn.urn,
                        gas,
                        var_meta["standard_name"],
                        var_meta,
                        sensor_vertical_datum='NAVD88',
                        fillvalue=fillvalue,
                        data_column=var,
                        vertical_axis_name='height')
                    sensor_urn = urnify(station_urn.authority,
                                        station_urn.label, var_meta)
                    ts.add_instrument_metadata(urn=sensor_urn)
                elif output_format == 'cf16':
                    # If CF, add variable to existing TimeSeries
                    try:
                        int(var[0])
                        variable_name = 'v_{}'.format(var)
                    except BaseException:
                        variable_name = var
                    ts.add_variable(variable_name,
                                    values=result.df[var].values,
                                    attributes=var_meta,
                                    fillvalue=fillvalue,
                                    sensor_vertical_datum='NAVD88')