Example #1
0
    def test_multiple_attr_filter(self):
        nc = EnhancedDataset(self.input_file)
        grid_spacing_vars = nc.get_variables_by_attributes(grid_spacing='4.0 km', standard_name='projection_y_coordinate')

        y = nc.variables.get('y')

        self.assertEqual(len(grid_spacing_vars), 1)
        assert y in grid_spacing_vars
Example #2
0
    def test_multiple_attr_filter(self):
        nc = EnhancedDataset(self.input_file)
        grid_spacing_vars = nc.get_variables_by_attributes(grid_spacing='4.0 km', standard_name='projection_y_coordinate')

        y = nc.variables.get('y')

        self.assertEquals(len(grid_spacing_vars), 1)
        assert y in grid_spacing_vars
Example #3
0
    def test_single_attr_filter(self):
        nc = EnhancedDataset(self.input_file)
        grid_spacing_vars = nc.get_variables_by_attributes(grid_spacing='4.0 km')

        x = nc.variables.get('x')
        y = nc.variables.get('y')

        self.assertEquals(len(grid_spacing_vars), 2)
        assert x in grid_spacing_vars
        assert y in grid_spacing_vars
Example #4
0
    def test_single_attr_filter(self):
        nc = EnhancedDataset(self.input_file)
        grid_spacing_vars = nc.get_variables_by_attributes(grid_spacing='4.0 km')

        x = nc.variables.get('x')
        y = nc.variables.get('y')

        self.assertEqual(len(grid_spacing_vars), 2)
        assert x in grid_spacing_vars
        assert y in grid_spacing_vars
Example #5
0
def test_int64_dtypes(data, expected_dtype):
    assert get_dtype(data) == expected_dtype
    with EnhancedDataset('foo.nc', 'w') as ncd:
        ncd.createDimension('three', 3)
        v = ncd.createVariable('foo', expected_dtype, ('three', ))
        v[:] = data
    os.remove('foo.nc')
Example #6
0
def create_file(output, ncfile, varname, df):
    with EnhancedDataset(ncfile) as ncd:
        var = ncd[varname]

        latitude = ncd.get_variables_by_attributes(
            standard_name='latitude')[0][:]
        longitude = ncd.get_variables_by_attributes(
            standard_name='longitude')[0][:]
        project = ncd.original_folder
        feature_name = '{}_{}'.format(project, ncd.MOORING).lower()

        station_urn = IoosUrn(authority=ncd.naming_authority,
                              label=feature_name,
                              asset_type='station').urn

        discriminant = ncd.id.replace('-', '_')
        output_filename = '{0}_{1}-{2}_{3}_TO_{4}.nc'.format(
            feature_name, var.name, discriminant,
            df['time'].min().strftime("%Y%m%dT%H%M%SZ"),
            df['time'].max().strftime("%Y%m%dT%H%M%SZ"))
        output_directory = os.path.join(output, feature_name)

        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)

        file_global_attributes = {k: getattr(ncd, k) for k in ncd.ncattrs()}
        # original_folder is the project name
        file_global_attributes.update(
            dict(title='{} - {}'.format(project, ncd.MOORING),
                 id=feature_name))

        variable_attributes = {k: getattr(var, k) for k in var.ncattrs()}
        # Add the specific sensor as a discriminant
        variable_attributes.update(dict(discriminant=discriminant))

        fillvalue = -9999.9
        if hasattr(var, "_FillValue"):
            fillvalue = var._FillValue

        vertical_datum = None
        if 'crs' in ncd.variables and hasattr(ncd.variables['crs'],
                                              'vertical_datum'):
            vertical_datum = ncd.variables['crs'].vertical_datum

        ts = TimeSeries.from_dataframe(df,
                                       output_directory,
                                       output_filename,
                                       latitude,
                                       longitude,
                                       station_urn,
                                       file_global_attributes,
                                       var.standard_name,
                                       variable_attributes,
                                       sensor_vertical_datum=vertical_datum,
                                       fillvalue=fillvalue,
                                       vertical_axis_name='height',
                                       vertical_positive='down')
        ts.add_instrument_variable(variable_name=var.standard_name)
        del ts
Example #7
0
def normalize_vectors(netcdf_file):
    with EnhancedDataset(netcdf_file, 'a') as nc:
        east = None
        north = None
        for v in nc.variables:
            nc_var = nc.variables.get(v)
            if hasattr(
                    nc_var, 'standard_name'
            ) and nc_var.standard_name == 'eastward_sea_water_velocity':
                east = nc_var
                continue
            if hasattr(
                    nc_var, 'standard_name'
            ) and nc_var.standard_name == 'northward_sea_water_velocity':
                north = nc_var
                continue

        std_names = []
        for varname in nc.variables:
            var = nc.variables.get(varname)
            if hasattr(var, 'standard_name'):
                std_names.append(var.standard_name)

        # Only add the variables if they don't already exist
        if east is not None and north is not None and 'sea_water_speed' not in std_names and 'direction_of_sea_water_velocity' not in std_names:
            # We have vectors... create the speed and direction variables
            speed = np.sqrt(np.square(east[:]) + np.square(north[:]))
            direction = np.degrees(np.arctan2(north[:], east[:]))

            east_fill_value = east._FillValue if hasattr(
                east, '_FillValue') else np.nan
            spd = nc.createVariable('CS_300',
                                    east.dtype,
                                    east.dimensions,
                                    fill_value=east_fill_value)
            spd.standard_name = 'sea_water_speed'
            spd.long_name = "Current speed"
            spd.units = 'm/s'
            spd.epic_code = 300
            spd[:] = speed

            drc = nc.createVariable('CD_310',
                                    east.dtype,
                                    east.dimensions,
                                    fill_value=east_fill_value)
            drc.standard_name = 'direction_of_sea_water_velocity'
            drc.long_name = "Current direction"
            drc.units = 'degree'
            drc.epic_code = 310
            drc[:] = direction
Example #8
0
def normalize_units(netcdf_file):
    with EnhancedDataset(netcdf_file, 'a') as nc:
        for v in nc.variables:
            nc_var = nc.variables.get(v)
            if hasattr(nc_var, 'units') and nc_var.units == "K":
                # Convert kelvin to Celsius
                nc_var[:] = nc_var[:] - 273.15
                nc_var.units = "degree_Celsius"
            elif hasattr(
                    nc_var, 'standard_name'
            ) and nc_var.standard_name == 'sea_surface_wave_from_direction':
                # Convert "From" to "To" direction
                nc_var[:] = (nc_var[:] + 180) % 360
                nc_var.standard_name = 'sea_surface_wave_to_direction'
                nc_var.long_name = "Wave Direction (to TN)"
Example #9
0
    def test_generic_masked_bad_min_max_value(self):

        _, tpath = tempfile.mkstemp(suffix='.nc', prefix='pyaxiom-test')
        shutil.copy2(self.input_file, tpath)

        with EnhancedDataset(tpath, 'a') as ncd:
            v = ncd.variables['v_component_wind_true_direction_all_geometries']
            v.valid_min = 0.1
            v.valid_max = 0.1
            r = generic_masked(v[:], attrs=ncd.vatts(v.name))
            rflat = r.flatten()
            assert rflat[~rflat.mask].size == 0

            # Create a byte variable with a float valid_min and valid_max
            # to make sure it doesn't error
            b = ncd.createVariable('imabyte', 'b')
            b.valid_min = 0
            b.valid_max = 600  # this ss over a byte and thus invalid
            b[:] = 3

            r = generic_masked(b[:], attrs=ncd.vatts(b.name))
            assert np.all(r.mask == False)  # noqa

            b.valid_min = 0
            b.valid_max = 2
            r = generic_masked(b[:], attrs=ncd.vatts(b.name))
            assert np.all(r.mask == True)  # noqa

            c = ncd.createVariable('imanotherbyte', 'f4')
            c.setncattr('valid_min', '0b')
            c.setncattr('valid_max', '9b')
            c[:] = 3
            r = generic_masked(c[:], attrs=ncd.vatts(c.name))
            assert np.all(r.mask == False)  # noqa

            c = ncd.createVariable('imarange', 'f4')
            c.valid_range = [0.0, 2.0]
            c[:] = 3.0
            r = generic_masked(c[:], attrs=ncd.vatts(c.name))
            assert np.all(r.mask == True)  # noqa

            c.valid_range = [0.0, 2.0]
            c[:] = 1.0
            r = generic_masked(c[:], attrs=ncd.vatts(c.name))
            assert np.all(r.mask == False)  # noqa

        if os.path.exists(tpath):
            os.remove(tpath)
Example #10
0
    def add_time_bounds(self, delta=None, position=None):
        with EnhancedDataset(self.out_file, 'a') as nc:
            nc.createDimension("bounds", 2)
            time_bounds = nc.createVariable('{}_bounds'.format(
                self.time_axis_name),
                                            "f8", (
                                                "time",
                                                "bounds",
                                            ),
                                            chunksizes=(
                                                1000,
                                                2,
                                            ))
            time_bounds.units = "seconds since 1970-01-01T00:00:00Z"
            time_bounds.calendar = "gregorian"

            time_objs = netCDF4.num2date(self.time[:],
                                         units=self.time.units,
                                         calendar=self.time.calendar)
            bounds_kwargs = dict(units=time_bounds.units,
                                 calendar=time_bounds.calendar)

            if position == "start":
                time_bounds[:] = np.asarray(
                    list(
                        zip(
                            self.time[:],
                            netCDF4.date2num(time_objs + delta,
                                             **bounds_kwargs))))
            elif position == "middle":
                time_bounds[:] = np.asarray(
                    list(
                        zip(
                            netCDF4.date2num(time_objs - delta / 2,
                                             **bounds_kwargs),
                            netCDF4.date2num(time_objs + delta / 2,
                                             **bounds_kwargs))))
            elif position == "end":
                time_bounds[:] = np.asarray(
                    list(
                        zip(
                            netCDF4.date2num(time_objs - delta,
                                             **bounds_kwargs), self.time[:])))
Example #11
0
def normalize_time(netcdf_file):
    epoch_units = 'seconds since 1970-01-01T00:00:00Z'
    millisecond_units = 'milliseconds since 1858-11-17T00:00:00Z'

    with EnhancedDataset(netcdf_file, 'a') as nc:
        # Signell said this works, any problems and we can all blame him!
        time_data = netCDF4.num2date(
            (np.int64(nc.variables['time'][:]) - 2400001) * 3600 * 24 * 1000 +
            nc.variables['time2'][:].__array__(),
            units=millisecond_units)  # noqa
        nc.renameVariable("time", "old_time")
        nc.sync()

        time = nc.createVariable('time', 'f8', ('time'))
        time.units = epoch_units
        time.standard_name = "time"
        time.long_name = "time of measurement"
        time.calendar = "gregorian"
        time[:] = netCDF4.date2num(time_data, units=epoch_units).round()
        return time_data[0]
Example #12
0
    def add_variable_object(self,
                            varobject,
                            dimension_map=None,
                            reduce_dims=None):

        dimension_map = dimension_map or {}
        reduce_dims = reduce_dims or False

        with EnhancedDataset(self.out_file, 'a') as nc:

            fillvalue = -9999.99
            if hasattr(varobject, '_FillValue'):
                fillvalue = varobject._FillValue

            dims = []
            for n in varobject.dimensions:
                d = dimension_map.get(n, n)
                dim_size = varobject.shape[list(varobject.dimensions).index(n)]
                if reduce_dims is True and dim_size in [0, 1]:
                    continue

                if d not in nc.dimensions:
                    nc.createDimension(d, dim_size)
                dims.append(d)

            var = nc.createVariable(varobject.name,
                                    varobject.dtype,
                                    dims,
                                    fill_value=fillvalue,
                                    zlib=True)

            for k in varobject.ncattrs():
                if k not in ['name', '_FillValue']:
                    var.setncattr(k, varobject.getncattr(k))

            if reduce_dims:
                var[:] = varobject[:].squeeze()
            else:
                var[:] = varobject[:]
Example #13
0
def download(folder, project_metadata, filesubset, since):

    # Use thredds_crawler to find DAP endpoints of the RAW data.
    total_datasets = []
    skips = Crawl.SKIPS + ['.*OTHER.*', '.*ancillary.*', '.*OLD_VERSIONS.*']

    try:
        for k, v in project_metadata.items():
            # http://regexr.com/3conn
            datasets = Crawl(v['catalog_xml'],
                             select=['(.*)'],
                             skip=skips,
                             after=since).datasets
            logger.info("Found {0} datasets in {1}!".format(len(datasets), k))
            total_datasets += datasets
        logger.info("Found {0} TOTAL datasets!".format(len(total_datasets)))
    except KeyboardInterrupt:
        logger.info("Breaking out of crawling loop.")
        total_datasets = []

    try:
        os.makedirs(folder)
    except OSError:
        pass

    # Save datasets to download directory
    saved_files = []
    for num, d in enumerate(total_datasets):

        if filesubset and d.name.lower() not in filesubset:
            continue

        try:
            http_url = next(s["url"] for s in d.services
                            if s["service"].lower() == "httpserver")
            project_name = http_url.split("/")[-2]
        except StopIteration:
            logger.error("No HTTPServer endpoint found, skipping")
            continue

        # Make download folder
        save_file = os.path.join(folder, project_name, d.name)
        if not os.path.isdir(os.path.dirname(save_file)):
            os.makedirs(os.path.dirname(save_file))
        logger.info("Downloading {0}".format(http_url))
        try:
            with open(save_file, "wb") as f:
                r = requests.get(http_url, stream=True)
                if not r.ok:
                    logger.error(
                        "Could not download '{!s}' from '{!s}', skipping".
                        format(d.name, http_url))
                    break
                for block in r.iter_content(1024):
                    if not block:
                        break
                    f.write(block)
        except KeyboardInterrupt:
            logger.info("Breaking out of download loop.")
            raise
        except BaseException:
            logger.error(
                "Could not download... error with HTTP endpoint.  Skipping.")
            continue

        # Try to open file, if it fails, writing failed.
        try:
            with EnhancedDataset(save_file, 'a') as nc:
                name, _ = os.path.splitext(d.name)
                nc.id = "{0}/{1}".format(project_name, name)
        except BaseException:
            os.remove(save_file)
            raise
        else:
            logger.info("{!s} saved ({!s}/{!s})".format(
                d.name, num + 1, len(total_datasets)))
            saved_files.append(save_file)

    return saved_files
Example #14
0
def main(output,
         download_folder,
         projects,
         do_download,
         filesubset=None,
         since=None):

    if do_download:
        try:
            downloaded_files = download(download_folder, projects, filesubset,
                                        since)
        except KeyboardInterrupt:
            logger.exception('Interpted downloading datasets from THREDDS')
            downloaded_files = []
    else:
        downloaded_files = glob(os.path.join(download_folder, '**', '*'))
        if since is not None:

            def should_keep(d):
                modt = datetime.utcfromtimestamp(
                    os.path.getmtime(d)).replace(tzinfo=pytz.utc)
                return modt >= since

            downloaded_files = [
                dl for dl in downloaded_files if should_keep(dl)
            ]

    epic_skips = metadata_codes + voltage_codes + location_codes + time_codes

    downloaded_files = sorted(downloaded_files)

    # Take the downloaded_files and split them up into arrays of related files.
    # This is needed because some of the files need to be combined... they
    # represent that same station/mooring/variables, but at different depths
    i = 0
    combinations = []
    while i < len(downloaded_files):

        combo = []
        nc_file = os.path.abspath(downloaded_files[i])
        combo.append(nc_file)

        try:

            if filesubset is not None:
                if os.path.basename(nc_file).lower() not in filesubset:
                    # aka "9631ecp-a.nc"
                    # Skip this file!
                    continue

            with EnhancedDataset(nc_file) as tmpnc:

                if projects:
                    if hasattr(
                            tmpnc, 'original_folder'
                    ) and tmpnc.original_folder.upper() not in projects:
                        continue

                logger.info("Scanned {}".format(nc_file))
                # Now search for files that are of the same var, but with a different depth
                thisbase = os.path.basename(nc_file).lower().split("_d")[0]

                for j in range(i + 1, len(downloaded_files)):
                    nextout = os.path.abspath(downloaded_files[j])
                    nextbase = os.path.basename(nextout).lower().split("_d")[0]
                    if thisbase == nextbase:
                        # Found a match
                        logger.info("Scanned {}".format(nextout))
                        combo.append(nextout)
                        # Now skip file because we added it already
                        i += 1
                    else:
                        # Doesn't match, move on to the next outfile
                        break
                # Add to combinations so it is processed
                combinations.append(combo)
        except BaseException:
            logger.exception("Error. Skipping {0}.".format(nc_file))
            continue
        finally:
            i += 1

    # Now iterate over each set of files and combine as necessary
    for c in combinations:
        dataframes_to_create = dict()
        for f in c:
            try:
                with EnhancedDataset(f) as ncd:
                    for var in ncd.get_variables_by_attributes(
                            coordinates=lambda v: v is not None):
                        if not hasattr(var, 'standard_name'):
                            logger.warning(
                                "{}: Skipping variable {} because it has no standard_name"
                                .format(f, var.name))
                            continue
                        if hasattr(
                                var,
                                'epic_code') and var.epic_code in epic_skips:
                            logger.warning(
                                "{}: Skipping metadata variable {}".format(
                                    f, var.standard_name))
                            continue

                        df = get_dataframe_from_variable(ncd, var)

                        if var.name in dataframes_to_create:
                            logger.info("Combining variable {}".format(
                                var.name))
                            old_df = dataframes_to_create[var.name]['frame']
                            dataframes_to_create[
                                var.name]['frame'] = old_df.combine_first(df)
                        else:
                            logger.info("New variable {}".format(var.name))
                            df_dict = dict(frame=df,
                                           varname=var.name,
                                           ncfile=f)
                            dataframes_to_create[var.name] = df_dict
            except BaseException:
                logger.exception("Error. Skipping {0}.".format(f))
                continue

        # Create a file for each dataframe
        for varname, creation in dataframes_to_create.items():
            create_file(output, creation['ncfile'], creation['varname'],
                        creation['frame'])
Example #15
0
    def add_variable(self,
                     variable_name,
                     values,
                     times=None,
                     verticals=None,
                     sensor_vertical_datum=None,
                     attributes=None,
                     unlink_from_profile=None,
                     fillvalue=None,
                     raise_on_error=False):

        if isinstance(values, (
                list,
                tuple,
        )) and values:
            values = np.asarray(values)
        if isinstance(times, (
                list,
                tuple,
        )) and times:
            times = np.asarray(times)
        if isinstance(verticals, (
                list,
                tuple,
        )) and verticals:
            verticals = np.asarray(verticals)

        # Set vertical datum on the CRS variable
        if sensor_vertical_datum is not None:
            try:
                self.crs.geoid_name = sensor_vertical_datum
                self.crs.vertical_datum = sensor_vertical_datum
                self.crs.water_surface_reference_datum = sensor_vertical_datum
            except AttributeError:
                pass

        # Set default fillvalue for new variables
        if fillvalue is None:
            fillvalue = -9999.9

        used_values = None
        try:
            if unlink_from_profile is True:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            # These next two cases should work for all but a few cases, which are caught below
            elif self.z.size == 1:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            else:
                used_values = np.ma.reshape(values, (
                    self.time.size,
                    self.z.size,
                ))
                used_values = used_values[self.time_indexes]
                try:
                    used_values = used_values[:, self.vertical_indexes]
                except IndexError:
                    # The vertical values most likely had duplicates.  Ignore the
                    # falty index here and try to save the values as is.
                    pass
        except ValueError:
            if raise_on_error is True:
                raise
            else:
                logger.warning(
                    "Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}"
                    .format(self.time.size, self.z.size, values.size))
            if self.z.size > 1:
                if times is not None and verticals is not None:
                    # Hmmm, we have two actual height values for this station.
                    # Not cool man, not cool.
                    # Reindex the entire values array.  This is slow.
                    indexed = ((bisect.bisect_left(self.time[:], times[i]),
                                bisect.bisect_left(self.z[:],
                                                   verticals[i]), values[i])
                               for i in range(values.size))
                    used_values = np.ndarray((
                        self.time.size,
                        self.z.size,
                    ),
                                             dtype=values.dtype)
                    used_values.fill(float(fillvalue))
                    for (tzi, zzi, vz) in indexed:
                        if zzi < self.z.size and tzi < self.time.size:
                            used_values[tzi, zzi] = vz
                else:
                    raise ValueError(
                        "You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter."
                    )
            else:
                if times is not None:
                    # Ugh, find the time indexes manually
                    indexed = ((bisect.bisect_left(self.time[:],
                                                   times[i]), values[i])
                               for i in range(values.size))
                    used_values = np.ndarray((self.time.size, ),
                                             dtype=values.dtype)
                    used_values.fill(float(fillvalue))
                    for (tzi, vz) in indexed:
                        if tzi < self.time.size:
                            used_values[tzi] = vz
                else:
                    raise ValueError(
                        "You need to pass in a 'times' parameter that matches the size of the 'values' parameter."
                    )

        with EnhancedDataset(self.out_file, 'a') as nc:
            logger.info("Setting values for {}...".format(variable_name))
            if len(used_values.shape) == 1:
                var = nc.createVariable(variable_name,
                                        used_values.dtype, ("time", ),
                                        fill_value=fillvalue,
                                        chunksizes=(1000, ),
                                        zlib=True)
                if self.z.size == 1:
                    var.coordinates = "{} {} latitude longitude".format(
                        self.time_axis_name, self.vertical_axis_name)
                else:
                    # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate
                    var.coordinates = "{} latitude longitude".format(
                        self.time_axis_name)
                    if unlink_from_profile is True:
                        # Create metadata variable for the sensor_depth
                        if nc.variables.get('sensor_depth') is None:
                            logger.info(
                                "Setting the special case 'sensor_depth' metadata variable"
                            )
                            inst_depth = nc.createVariable(
                                'sensor_depth', 'f4')
                            inst_depth.units = 'm'
                            inst_depth.standard_name = 'surface_altitude'
                            inst_depth.positive = self.vertical_positive
                            if self.vertical_positive.lower() == 'down':
                                inst_depth.long_name = 'sensor depth below datum'
                            elif self.vertical_positive.lower() == 'up':
                                inst_depth.long_name = 'sensor height above datum'
                            inst_depth.datum = sensor_vertical_datum or 'Unknown'
                            if verticals and verticals.size > 0:
                                inst_depth[:] = verticals[0]
                            else:
                                inst_depth[:] = self.vertical_fill

            elif len(used_values.shape) == 2:
                var = nc.createVariable(variable_name,
                                        used_values.dtype, (
                                            "time",
                                            "z",
                                        ),
                                        fill_value=fillvalue,
                                        chunksizes=(
                                            1000,
                                            self.z.size,
                                        ),
                                        zlib=True)
                var.coordinates = "{} {} latitude longitude".format(
                    self.time_axis_name, self.vertical_axis_name)
            else:
                raise ValueError(
                    "Could not create variable.  Shape of data is {!s}.  Expected a dimension of 1 or 2, not {!s}."
                    .format(used_values.shape, len(used_values.shape)))
            # Set the variable attributes as passed in
            if attributes:
                for k, v in attributes.items():

                    if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None:
                        # Use this as the vertical datum if it is specified and we didn't already have one
                        try:
                            self.crs.geoid_name = v
                            self.crs.vertical_datum = v
                            self.crs.water_surface_reference_datum = v
                        except AttributeError:
                            pass

                    if k not in ['name', 'coordinates', '_FillValue'
                                 ] and v is not None:
                        try:
                            var.setncattr(k, v)
                        except BaseException:
                            logger.info(
                                'Could not add attribute {}: {}, skipping.'.
                                format(k, v))

            var.grid_mapping = 'crs'
            var[:] = used_values

            return var
Example #16
0
def main(output,
         download_folder,
         do_download,
         projects,
         csv_metadata_file,
         filesubset=None,
         since=None):
    project_metadata = dict()
    with open(csv_metadata_file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            project_name = row['project_name']
            if isinstance(project_name, str) and project_name[0] == '#':
                continue
            if projects and project_name.lower() not in projects:
                # Skip projects if a subset was defined
                continue
            project_metadata[project_name] = dict()
            for k, v in row.items():
                project_metadata[project_name][k] = v

    if do_download:
        try:
            downloaded_files = download(download_folder, project_metadata,
                                        filesubset, since)
        except KeyboardInterrupt:
            logger.exception('Error downloading datasets from THREDDS')
            downloaded_files = []
    else:
        downloaded_files = glob(os.path.join(download_folder, '**', '*'))
        if since is not None:

            def should_keep(d):
                modt = datetime.utcfromtimestamp(
                    os.path.getmtime(d)).replace(tzinfo=pytz.utc)
                return modt >= since

            downloaded_files = [
                dl for dl in downloaded_files if should_keep(dl)
            ]

    for down_file in sorted(downloaded_files):

        temp_fd, temp_file = tempfile.mkstemp(prefix='cmg_collector',
                                              suffix='nc')
        try:

            if filesubset is not None:
                if os.path.basename(down_file).lower() not in filesubset:
                    # aka "9631ecp-a.nc"
                    # Skip this file!
                    continue

            project_name = os.path.basename(os.path.dirname(down_file))
            if projects:
                if project_name.lower() not in projects:
                    # Skip this project!
                    continue
            shutil.copy(down_file, temp_file)

            # Cleanup to CF-1.6
            try:
                first_time = normalize_time(temp_file)
            except (TypeError, ValueError, IndexError):
                logger.exception(
                    "Could not normalize the time variable. Skipping {0}.".
                    format(down_file))
                continue
            except OverflowError:
                logger.error(
                    "Dates out of range. Skipping {0}.".format(down_file))
                continue

            normalize_epic_codes(temp_file, down_file)
            normalize_vectors(temp_file)
            normalize_units(temp_file)

            # Create list of variables that we want to save.
            mooring_id = None
            latitude = None
            longitude = None

            fname = os.path.basename(down_file)
            feature_name, file_ext = os.path.splitext(
                os.path.basename(down_file))
            try:
                mooring_id = int(9999)
            except ValueError:
                logger.exception(
                    "Could not create a suitable station_id. Skipping {0}.".
                    format(down_file))
                continue

            file_name = os.path.basename(down_file)
            output_directory = os.path.join(output, project_name)
            logger.info("Translating {0} into CF1.6 format: {1}".format(
                down_file,
                os.path.abspath(os.path.join(output_directory, file_name))))

            with EnhancedDataset(temp_file) as nc:

                try:
                    latitude = nc.variables.get("lat")[0]
                    longitude = nc.variables.get("lon")[0]
                except IndexError:
                    latitude = nc.variables.get("lat")[:]
                    longitude = nc.variables.get("lon")[:]
                except TypeError:
                    logger.error(
                        "Could not find lat/lon variables. Skipping {0}.".
                        format(down_file))

                file_global_attributes = {
                    k: getattr(nc, k)
                    for k in nc.ncattrs()
                }
                file_global_attributes.update(global_attributes)
                file_global_attributes['id'] = feature_name
                file_global_attributes['MOORING'] = mooring_id
                file_global_attributes['original_filename'] = fname
                file_global_attributes['original_folder'] = project_name

                no_override = [
                    'id', 'MOORING', 'original_filename', 'original_folder',
                    'catalog_xml', 'project_name'
                ]
                if project_name in project_metadata:
                    for k, v in project_metadata[project_name].items():
                        if v and k.lower() not in no_override:
                            file_global_attributes[k] = v

                if 'summary' in file_global_attributes:
                    # Save the original summary
                    file_global_attributes[
                        'WHOI_Buoy_Group_summary'] = file_global_attributes[
                            'summary']

                # Better title/summary for discovery via catalogs
                project_title = file_global_attributes.get(
                    'project_title', project_name).strip()
                project_summary = file_global_attributes.get(
                    'project_summary', '').strip()
                file_global_attributes[
                    'title'] = 'USGS-CMG time-series data: {0} - {1} - {2}'.format(
                        project_name, mooring_id, feature_name)
                file_global_attributes[
                    'summary'] = 'USGS-CMG time-series data from the {} project, mooring {} and package {}. {}'.format(
                        project_title, mooring_id, feature_name,
                        project_summary).strip()

                times = nc.variables.get('time')[:]

                # Get all depth values
                depth_variables = []
                for dv in nc.variables:
                    depth_variables += [
                        x for x in nc.variables.get(dv).dimensions
                        if 'depth' in x
                    ]
                depth_variables = sorted(list(set(depth_variables)))

                try:
                    assert depth_variables
                    depth_values = np.asarray([
                        nc.variables.get(x)[:] for x in depth_variables
                    ]).flatten()
                except (AssertionError, TypeError):
                    logger.warning(
                        "No depth variables found in {}, skipping.".format(
                            down_file))
                    continue

                # Convert everything to positive up, unless it is specifically specified as "up" already
                depth_conversion = -1.0
                if depth_variables:
                    pull_positive = nc.variables.get(depth_variables[0])
                    if hasattr(pull_positive, 'positive'
                               ) and pull_positive.positive.lower() == 'up':
                        depth_conversion = 1.0
                depth_values = depth_values * depth_conversion

                if not os.path.isdir(output_directory):
                    os.makedirs(output_directory)
                ts = TimeSeries(output_directory,
                                latitude,
                                longitude,
                                feature_name,
                                file_global_attributes,
                                times=times,
                                verticals=depth_values,
                                output_filename=file_name,
                                vertical_positive='up')

                # Set the platform type from the global attribute 'platform_type', defaulting to 'fixed'
                with EnhancedDataset(ts.out_file, 'a') as onc:
                    platform_type = getattr(onc, 'platform_type',
                                            'fixed').lower()
                    onc.variables['platform'].setncattr('type', platform_type)
                    onc.variables['platform'].setncattr(
                        'nodc_name', "FIXED PLATFORM, MOORINGS")
                    # Add ERDDAP variables
                    onc.cdm_data_type = "TimeSeries"
                    onc.cdm_timeseries_variables = "latitude,longitude,z,feature_type_instance"

                v = []
                depth_files = []
                for other in sorted(
                        nc.variables):  # Sorted for a reason... don't change!
                    try:
                        if other in coord_vars:
                            continue

                        ovsd = None  # old var sensor depth
                        old_var = nc.variables.get(other)
                        variable_attributes = {
                            k: getattr(old_var, k)
                            for k in old_var.ncattrs()
                        }
                        # Remove/rename some attributes
                        # https://github.com/USGS-CMG/usgs-cmg-portal/issues/67
                        if 'valid_range' in variable_attributes:
                            del variable_attributes['valid_range']
                        if 'minimum' in variable_attributes:
                            variable_attributes[
                                'actual_min'] = variable_attributes['minimum']
                            del variable_attributes['minimum']
                        if 'maximum' in variable_attributes:
                            variable_attributes[
                                'actual_max'] = variable_attributes['maximum']
                            del variable_attributes['maximum']
                        if 'sensor_depth' in variable_attributes:
                            # sensor_depth is ALWAYS positive "down", so don't convert!
                            # This is contrary to the "positive" attribute on the Z axis.
                            # variable_attributes['sensor_depth'] = variable_attributes['sensor_depth'] * -1
                            # Round the sensor_depth attribute
                            variable_attributes['sensor_depth'] = np.around(
                                variable_attributes['sensor_depth'],
                                decimals=4)
                            ovsd = np.around(old_var.sensor_depth *
                                             depth_conversion,
                                             decimals=4)

                        fillvalue = None
                        if hasattr(old_var, "_FillValue"):
                            fillvalue = old_var._FillValue

                        # Figure out if this is a variable that is repeated at different depths
                        # as different variable names.   Assumes sorted.
                        new_var_name = other.split('_')[0]
                        if new_var_name in ts.ncd.variables:
                            # Already in new file (processed when the first was encountered in the loop below)
                            continue

                        # Get the depth index
                        depth_variable = [
                            x for x in old_var.dimensions if 'depth' in x
                        ]
                        if depth_variable and len(
                                old_var.dimensions
                        ) > 1 and 'time' in old_var.dimensions:
                            depth_index = np.squeeze(
                                np.where(depth_values == (
                                    nc.variables.get(depth_variable[0])[:] *
                                    depth_conversion)))

                            # Find other variable names like this one
                            depth_indexes = [(other, depth_index)]
                            for search_var in sorted(nc.variables):
                                # If they have different depth dimension names we need to combine them into one variable
                                if search_var != other and search_var.split('_')[0] == new_var_name and \
                                   depth_variable[0] != [ x for x in nc.variables[search_var].dimensions if 'depth' in x ][0]:
                                    # Found a match at a different depth
                                    search_depth_variable = [
                                        x for x in nc.variables.get(
                                            search_var).dimensions
                                        if 'depth' in x
                                    ]
                                    depth_index = np.squeeze(
                                        np.where(depth_values == (
                                            nc.variables.get(
                                                search_depth_variable[0])[:] *
                                            depth_conversion)))
                                    depth_indexes.append(
                                        (search_var, depth_index))
                                    logger.info(
                                        "Combining '{}' with '{}' as '{}' (different variables at different depths but are the same parameter)"
                                        .format(search_var, other,
                                                new_var_name))

                            values = np.ma.empty(
                                (times.size, len(depth_values)),
                                dtype=old_var.dtype)
                            values.fill_value = fillvalue
                            values.mask = True
                            inconsistent = False
                            for nm, index in depth_indexes:
                                try:
                                    values[:, index] = np.squeeze(
                                        nc.variables.get(nm)[:])
                                except ValueError:
                                    inconsistent = True
                                    break

                            # If we just have one index we want to use the original name
                            if len(depth_indexes) == 1:
                                # Just use the original variable name
                                new_var_name = other

                            if inconsistent is True:
                                # Incorrect array size, most likely a strange variable
                                ts.add_variable_object(
                                    old_var,
                                    dimension_map=dict(depth='z'),
                                    reduce_dims=True)
                            else:
                                # Create this one, should be the first we encounter for this type
                                ts.add_variable(new_var_name,
                                                values=values,
                                                times=times,
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)

                        elif len(old_var.dimensions
                                 ) == 1 and old_var.dimensions[0] == 'time':
                            # A single time dimensioned variable, like pitch, roll, record count, etc.
                            ts.add_variable(other,
                                            values=old_var[:],
                                            times=times,
                                            unlink_from_profile=True,
                                            fillvalue=fillvalue,
                                            attributes=variable_attributes)
                        elif old_var.ndim <= 3 and ovsd and \
                                ((depth_values.size == 1 and not depth_variable and 'time' in old_var.dimensions) or
                                 (depth_values.size  > 1 and not depth_variable and 'time' in old_var.dimensions and 'sensor_depth' in ts.ncd.variables)):

                            if 'sensor_depth' in ts.ncd.variables and np.isclose(
                                    ts.ncd.variables['sensor_depth'][:], ovsd):
                                ts.add_variable(other,
                                                values=old_var[:],
                                                times=times,
                                                unlink_from_profile=True,
                                                verticals=[ovsd],
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)
                            else:
                                # Search through secondary files that have been created for detached variables at a certain depth and
                                # try to match this variable with one of the depths.
                                found_df = False
                                for dfts in depth_files:
                                    if isinstance(ovsd, np.ndarray):
                                        # Well, this is a bad file.
                                        raise ValueError(
                                            "The sensor_depth attribute has more than one value, please fix the source NetCDF: {}"
                                            .format(down_file))
                                    if np.isclose(
                                            dfts.ncd.variables[
                                                ts.vertical_axis_name][:],
                                            ovsd):
                                        dfts.add_variable(
                                            other,
                                            values=old_var[:],
                                            times=times,
                                            unlink_from_profile=True,
                                            verticals=[ovsd],
                                            fillvalue=fillvalue,
                                            attributes=variable_attributes)
                                        found_df = True
                                        break

                                # If we couldn't match the current or one of the existing secondary depth files, create a new one.
                                if found_df is False:
                                    new_file_name = file_name.replace(
                                        file_ext, '_z{}{}'.format(
                                            len(depth_files) + 1, file_ext))
                                    fga = copy(file_global_attributes)
                                    fga['id'] = os.path.splitext(
                                        new_file_name)[0]
                                    new_ts = TimeSeries(
                                        output_directory,
                                        latitude,
                                        longitude,
                                        feature_name,
                                        fga,
                                        times=times,
                                        verticals=[ovsd],
                                        output_filename=new_file_name,
                                        vertical_positive='up')
                                    new_ts.add_variable(
                                        other,
                                        values=old_var[:],
                                        times=times,
                                        verticals=[ovsd],
                                        fillvalue=fillvalue,
                                        attributes=variable_attributes)
                                    depth_files.append(new_ts)
                        elif old_var.ndim <= 3 and (
                                depth_values.size > 1 and not depth_variable
                                and 'time' in old_var.dimensions):
                            if ovsd:
                                # An ADCP or profiling dataset, but this variable is measued at a single depth.
                                # Example: Bottom Temperature on an ADCP
                                # Skip things with a dimension over 3 (some beam variables like `brange`)
                                ts.add_variable(other,
                                                values=old_var[:],
                                                times=times,
                                                unlink_from_profile=True,
                                                verticals=[ovsd],
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)
                            else:
                                ts.add_variable(other,
                                                values=old_var[:],
                                                times=times,
                                                unlink_from_profile=True,
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)
                        else:
                            if 'time' in old_var.dimensions and old_var.ndim <= 3:
                                ts.add_variable(other,
                                                values=old_var[:],
                                                times=times,
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)
                            else:
                                ts.add_variable_object(
                                    old_var,
                                    dimension_map=dict(depth='z'),
                                    reduce_dims=True)

                    except BaseException:
                        logger.exception(
                            "Error processing variable {0} in {1}. Skipping it."
                            .format(other, down_file))
        except KeyboardInterrupt:
            logger.info("Breaking out of Translate loop!")
            break
        except BaseException:
            logger.exception("Error. Skipping {0}.".format(down_file))
            continue
        finally:
            try:
                for df in depth_files:
                    del df
            except NameError:
                pass
            try:
                del ts
            except NameError:
                pass
            os.close(temp_fd)
            if os.path.isfile(temp_file):
                os.remove(temp_file)
Example #17
0
 def add_instrument_metadata(self, urn):
     with EnhancedDataset(self.out_file, 'a') as nc:
         instrument = nc.createVariable("instrument", "i4")
         instrument.definition = "http://mmisw.org/ont/ioos/definition/sensorID"
         instrument.long_name = urn
         instrument.ioos_code = urn
Example #18
0
    def __init__(self, output_directory, latitude, longitude, station_name, global_attributes, times=None, verticals=None, vertical_fill=None, output_filename=None, vertical_axis_name=None, vertical_positive=None):
        if output_filename is None:
            output_filename = '{}_{}.nc'.format(station_name, int(random.random() * 100000))
            logger.info("No output filename specified, saving as {}".format(output_filename))

        self.vertical_positive  = vertical_positive or 'down'
        self.vertical_axis_name = vertical_axis_name or 'z'
        self.time_axis_name     = 'time'

        # Make directory
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        self.time = None

        self.out_file = os.path.abspath(os.path.join(output_directory, output_filename))
        if os.path.isfile(self.out_file):
            os.remove(self.out_file)

        with EnhancedDataset(self.out_file, 'w') as nc:
            # Global attributes
            # These are set by this script, we don't someone to be able to set them manually
            global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution",
                            "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max",
                            "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max", "geospatial_bounds"
                            "geospatial_vertical_resolution", "geospatial_lat_resolution", "geospatial_lon_resolution",
                            "Conventions", "date_created", "date_modified", "date_issued"]
            for k, v in global_attributes.items():
                if v is None:
                    v = "None"
                if k not in global_skips:
                    nc.setncattr(k, v)

            now_date = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z")
            nc.setncattr("Conventions", "CF-1.6,ACDD-1.3")
            nc.setncattr("date_created", now_date)
            nc.setncattr("date_modified", now_date)
            nc.setncattr("date_issued", now_date)
            if not hasattr(nc, "date_metadata_modified"):
                nc.setncattr("date_metadata_modified", now_date)

            # Allow the customization of this attribute
            if 'cdm_data_type' not in global_attributes:
                nc.setncattr('cdm_data_type', 'Station')

            old_history = getattr(nc, 'history', '')
            new_history = '{} - {} - {}'.format(now_date, 'pyaxiom', 'File created using pyaxiom')
            if old_history:
                nc.setncattr('history', '{}\n{}'.format(old_history, new_history))
            else:
                nc.setncattr('history', new_history)

            # Station name
            nc.createDimension("feature_type_instance", len(station_name))
            name = nc.createVariable("feature_type_instance", "S1", ("feature_type_instance",))
            name.cf_role = "timeseries_id"
            name.long_name = "Identifier for each feature type instance"
            name[:] = list(station_name)

            # Location
            lat = nc.createVariable("latitude", get_type(latitude))
            lat.units           = "degrees_north"
            lat.standard_name   = "latitude"
            lat.long_name       = "sensor latitude"
            lat.axis            = "Y"
            lat.valid_min       = latitude
            lat.valid_max       = latitude
            lat[:] = latitude
            nc.setncattr("geospatial_lat_min", latitude)
            nc.setncattr("geospatial_lat_max", latitude)
            nc.setncattr("geospatial_lat_resolution", 0)
            nc.setncattr("geospatial_lat_units", "degrees_north")

            lon = nc.createVariable("longitude", get_type(longitude))
            lon.units           = "degrees_east"
            lon.standard_name   = "longitude"
            lon.long_name       = "sensor longitude"
            lon.axis            = "X"
            lon.valid_min       = longitude
            lon.valid_max       = longitude
            lon[:] = longitude
            nc.setncattr("geospatial_lon_min", longitude)
            nc.setncattr("geospatial_lon_max", longitude)
            nc.setncattr("geospatial_lon_resolution", 0)
            nc.setncattr("geospatial_lon_units", "degrees_east")

            nc.setncattr("geospatial_bounds", "POINT({} {})".format(longitude, latitude))
            if not hasattr(nc, "geospatial_bounds_crs"):
                nc.setncattr("geospatial_bounds_crs", "EPSG:4326")

            # Metadata variables
            self.crs = nc.createVariable("crs", "i4")
            self.crs.long_name           = "http://www.opengis.net/def/crs/EPSG/0/4326"
            self.crs.grid_mapping_name   = "latitude_longitude"
            self.crs.epsg_code           = "EPSG:4326"
            self.crs.semi_major_axis     = float(6378137.0)
            self.crs.inverse_flattening  = float(298.257223563)

            platform = nc.createVariable("platform", "i4")
            platform.definition = "http://mmisw.org/ont/ioos/definition/stationID"

            urn = IoosUrn.from_string(station_name)
            if urn.valid() is True:
                platform.short_name = global_attributes.get("title", urn.label)
                platform.long_name = global_attributes.get('summary', 'Station {}'.format(urn.label))
                platform.ioos_code = urn.urn
            else:
                platform.short_name = global_attributes.get("title", station_name)
                platform.long_name = global_attributes.get("summary", station_name)
                platform.ioos_code = station_name

            if vertical_fill is None:
                vertical_fill = -9999.9
            self.vertical_fill = vertical_fill

        self._nc = EnhancedDataset(self.out_file, 'a')
        self.setup_times_and_verticals(times, verticals)
        logger.info("Created file at '{}'".format(self.out_file))
Example #19
0
class TimeSeries(object):

    @staticmethod
    def from_dataframe(df, output_directory, output_filename, latitude, longitude, station_name, global_attributes, variable_name, variable_attributes, sensor_vertical_datum=None, fillvalue=None, data_column=None, vertical_axis_name=None, vertical_positive=None, create_instrument_variable=False, attempts=None):

        # Attempts is how many files to try to build a NetCDF files from a
        # dataframe. For backwards compatibility purposes, we always try
        # everything (even manual matching which takes forever and is a memory
        # hog).
        attempts = attempts or 5

        if fillvalue is None:
            fillvalue = -9999.9
        if data_column is None:
            data_column = 'value'

        data_fillvalue = df[data_column].values.dtype.type(fillvalue)
        vertical_fillvalue = df['depth'].values.dtype.type(fillvalue)

        df[data_column] = df[data_column].fillna(data_fillvalue)
        times = np.asarray([ calendar.timegm(x.utctimetuple()) for x in df['time'] ])
        df['depth'] = df['depth'].fillna(vertical_fillvalue)

        depths = df['depth'].values
        try:
            ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
            ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
        except ValueError:
            if attempts < 2:
                raise
            logger.warning("Attempt 2: using unique times")
            try:
                # Try uniquing time
                newtimes  = np.unique(times)
                ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
            except ValueError:
                if attempts < 3:
                    raise
                logger.warning("Attempt 3: using unique depths")
                try:
                    # Try uniquing depths
                    newdepths = np.unique(df['depth'].values)
                    ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=newdepths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                    ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
                except ValueError:
                    if attempts < 4:
                        raise
                    logger.warning("Attempt 4: using unique time and depth")
                    try:
                        # Unique both time and depth
                        newdepths = np.unique(df['depth'].values)
                        ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=newdepths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                        ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
                    except ValueError:
                        if attempts < 5:
                            raise
                        logger.warning("Attempt 5: manually matching (this is SLOW)")
                        # Manually match
                        ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                        ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, times=times, verticals=depths, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=False, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
        return ts

    def __init__(self, output_directory, latitude, longitude, station_name, global_attributes, times=None, verticals=None, vertical_fill=None, output_filename=None, vertical_axis_name=None, vertical_positive=None):
        if output_filename is None:
            output_filename = '{}_{}.nc'.format(station_name, int(random.random() * 100000))
            logger.info("No output filename specified, saving as {}".format(output_filename))

        self.vertical_positive  = vertical_positive or 'down'
        self.vertical_axis_name = vertical_axis_name or 'z'
        self.time_axis_name     = 'time'

        # Make directory
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        self.time = None

        self.out_file = os.path.abspath(os.path.join(output_directory, output_filename))
        if os.path.isfile(self.out_file):
            os.remove(self.out_file)

        with EnhancedDataset(self.out_file, 'w') as nc:
            # Global attributes
            # These are set by this script, we don't someone to be able to set them manually
            global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution",
                            "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max",
                            "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max", "geospatial_bounds"
                            "geospatial_vertical_resolution", "geospatial_lat_resolution", "geospatial_lon_resolution",
                            "Conventions", "date_created", "date_modified", "date_issued"]
            for k, v in global_attributes.items():
                if v is None:
                    v = "None"
                if k not in global_skips:
                    nc.setncattr(k, v)

            now_date = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z")
            nc.setncattr("Conventions", "CF-1.6,ACDD-1.3")
            nc.setncattr("date_created", now_date)
            nc.setncattr("date_modified", now_date)
            nc.setncattr("date_issued", now_date)
            if not hasattr(nc, "date_metadata_modified"):
                nc.setncattr("date_metadata_modified", now_date)

            # Allow the customization of this attribute
            if 'cdm_data_type' not in global_attributes:
                nc.setncattr('cdm_data_type', 'Station')

            old_history = getattr(nc, 'history', '')
            new_history = '{} - {} - {}'.format(now_date, 'pyaxiom', 'File created using pyaxiom')
            if old_history:
                nc.setncattr('history', '{}\n{}'.format(old_history, new_history))
            else:
                nc.setncattr('history', new_history)

            # Station name
            nc.createDimension("feature_type_instance", len(station_name))
            name = nc.createVariable("feature_type_instance", "S1", ("feature_type_instance",))
            name.cf_role = "timeseries_id"
            name.long_name = "Identifier for each feature type instance"
            name[:] = list(station_name)

            # Location
            lat = nc.createVariable("latitude", get_type(latitude))
            lat.units           = "degrees_north"
            lat.standard_name   = "latitude"
            lat.long_name       = "sensor latitude"
            lat.axis            = "Y"
            lat.valid_min       = latitude
            lat.valid_max       = latitude
            lat[:] = latitude
            nc.setncattr("geospatial_lat_min", latitude)
            nc.setncattr("geospatial_lat_max", latitude)
            nc.setncattr("geospatial_lat_resolution", 0)
            nc.setncattr("geospatial_lat_units", "degrees_north")

            lon = nc.createVariable("longitude", get_type(longitude))
            lon.units           = "degrees_east"
            lon.standard_name   = "longitude"
            lon.long_name       = "sensor longitude"
            lon.axis            = "X"
            lon.valid_min       = longitude
            lon.valid_max       = longitude
            lon[:] = longitude
            nc.setncattr("geospatial_lon_min", longitude)
            nc.setncattr("geospatial_lon_max", longitude)
            nc.setncattr("geospatial_lon_resolution", 0)
            nc.setncattr("geospatial_lon_units", "degrees_east")

            nc.setncattr("geospatial_bounds", "POINT({} {})".format(longitude, latitude))
            if not hasattr(nc, "geospatial_bounds_crs"):
                nc.setncattr("geospatial_bounds_crs", "EPSG:4326")

            # Metadata variables
            self.crs = nc.createVariable("crs", "i4")
            self.crs.long_name           = "http://www.opengis.net/def/crs/EPSG/0/4326"
            self.crs.grid_mapping_name   = "latitude_longitude"
            self.crs.epsg_code           = "EPSG:4326"
            self.crs.semi_major_axis     = float(6378137.0)
            self.crs.inverse_flattening  = float(298.257223563)

            platform = nc.createVariable("platform", "i4")
            platform.definition = "http://mmisw.org/ont/ioos/definition/stationID"

            urn = IoosUrn.from_string(station_name)
            if urn.valid() is True:
                platform.short_name = global_attributes.get("title", urn.label)
                platform.long_name = global_attributes.get('summary', 'Station {}'.format(urn.label))
                platform.ioos_code = urn.urn
            else:
                platform.short_name = global_attributes.get("title", station_name)
                platform.long_name = global_attributes.get("summary", station_name)
                platform.ioos_code = station_name

            if vertical_fill is None:
                vertical_fill = -9999.9
            self.vertical_fill = vertical_fill

        self._nc = EnhancedDataset(self.out_file, 'a')
        self.setup_times_and_verticals(times, verticals)
        logger.info("Created file at '{}'".format(self.out_file))

    def add_instrument_metadata(self, urn):
        instrument = self._nc.createVariable("instrument", "i4")
        instrument.definition = "http://mmisw.org/ont/ioos/definition/sensorID"
        instrument.long_name = urn
        instrument.ioos_code = urn
        self._nc.instrument = 'instrument'
        self._nc.sync()

    def add_instrument_variable(self, variable_name):
        if variable_name not in self._nc.variables:
            logger.error("Variable {} not found in file, cannot create instrument metadata variable")
            return
        elif 'id' not in self._nc.ncattrs() or 'naming_authority' not in self._nc.ncattrs():
            logger.error("Global attributes 'id' and 'naming_authority' are required to create an instrument variable")
            return

        instr_var_name = "{}_instrument".format(variable_name)
        instrument = self._nc.createVariable(instr_var_name, "i4")

        datavar = self._nc.variables[variable_name]
        vats = { k: getattr(datavar, k) for k in datavar.ncattrs() }
        instrument_urn = urnify(self._nc.naming_authority, self._nc.id, vats)

        inst_urn = IoosUrn.from_string(instrument_urn)
        instrument.long_name = 'Instrument measuring {} from {}'.format(inst_urn.component, inst_urn.label)
        instrument.ioos_code = instrument_urn
        instrument.short_name = inst_urn.component
        instrument.definition = "http://mmisw.org/ont/ioos/definition/sensorID"

        datavar.instrument = instr_var_name

        # Append the instrument to the ancilary variables
        av = getattr(datavar, 'ancillary_variables', '')
        av += ' {}'.format(instr_var_name)
        datavar.ancillary_variables = av.strip()

        self._nc.sync()

    def add_time_bounds(self, delta=None, position=None):
        self._nc.createDimension("bounds", 2)
        time_bounds = self._nc.createVariable('{}_bounds'.format(self.time_axis_name), "f8", ("time", "bounds",), chunksizes=(self.time_chunk, 2,))
        time_bounds.units    = "seconds since 1970-01-01T00:00:00Z"
        time_bounds.calendar = "gregorian"

        time_objs = netCDF4.num2date(self.time[:], units=self.time.units, calendar=self.time.calendar)
        bounds_kwargs = dict(units=time_bounds.units, calendar=time_bounds.calendar)

        if position == "start":
            time_bounds[:] = np.asarray(list(zip(self.time[:], netCDF4.date2num(time_objs + delta, **bounds_kwargs))))
        elif position == "middle":
            time_bounds[:] = np.asarray(list(zip(netCDF4.date2num(time_objs - delta / 2, **bounds_kwargs), netCDF4.date2num(time_objs + delta / 2, **bounds_kwargs))))
        elif position == "end":
            time_bounds[:] = np.asarray(list(zip(netCDF4.date2num(time_objs - delta, **bounds_kwargs), self.time[:])))

        self._nc.sync()

    def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False, create_instrument_variable=False):

        if isinstance(values, (list, tuple,)) and values:
            values = np.asarray(values)
        if get_type(values) == np.int64:
            # Create values as int32 because DAP does not support int64 until DAP4.
            values = values.astype(np.int32)

        if isinstance(times, (list, tuple,)) and times:
            times = np.asarray(times)
        if get_type(times) == np.int64:
            # Create time as int32 because DAP does not support int64 until DAP4.
            times = times.astype(np.int32)

        if isinstance(verticals, (list, tuple,)) and verticals:
            verticals = np.asarray(verticals)
        if get_type(verticals) == np.int64:
            # Create verticals as int32 because DAP does not support int64 until DAP4.
            verticals = verticals.astype(np.int32)

        # Set vertical datum on the CRS variable
        if sensor_vertical_datum is not None:
            try:
                self.crs.geoid_name = sensor_vertical_datum
                self.crs.vertical_datum = sensor_vertical_datum
                self.crs.water_surface_reference_datum = sensor_vertical_datum
                if not hasattr(self._nc, "geospatial_bounds_vertical_crs"):
                    self._nc.setncattr("geospatial_bounds_vertical_crs", sensor_vertical_datum)
            except AttributeError:
                pass

        # Set default fillvalue for new variables
        if fillvalue is None:
            fillvalue = -9999.9
        fillvalue = values.dtype.type(fillvalue)

        used_values = None

        vertical_axis = self._nc.variables.get(self.vertical_axis_name)
        try:
            if unlink_from_profile is True:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            # These next two cases should work for all but a few cases, which are caught below
            elif vertical_axis.size == 1:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            else:
                used_values = np.ma.reshape(values, (self.time.size, vertical_axis.size, ))
                used_values = used_values[self.time_indexes]
                try:
                    used_values = used_values[:, self.vertical_indexes]
                except IndexError:
                    # The vertical values most likely had duplicates.  Ignore the
                    # falty index here and try to save the values as is.
                    pass
        except ValueError:
            if raise_on_error is True:
                raise
            else:
                logger.warning("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, vertical_axis.size, values.size))
            if vertical_axis.size > 1:
                if times is not None and verticals is not None:
                    # Hmmm, we have two actual height values for this station.
                    # Not cool man, not cool.
                    # Reindex the entire values array.  This is slow.
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(vertical_axis[:], verticals[i]), values[i]) for i in range(values.size))
                    used_values = np.ndarray((self.time.size, vertical_axis.size, ), dtype=get_type(values))
                    used_values.fill(fillvalue)
                    for (tzi, zzi, vz) in indexed:
                        if zzi < vertical_axis.size and tzi < self.time.size:
                            used_values[tzi, zzi] = vz
                    del indexed
                else:
                    raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.")
            else:
                if times is not None:
                    # Ugh, find the time indexes manually
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size))
                    used_values = np.ndarray((self.time.size, ), dtype=get_type(values))
                    used_values.fill(fillvalue)
                    for (tzi, vz) in indexed:
                        if tzi < self.time.size:
                            used_values[tzi] = vz
                    del indexed
                else:
                    raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.")

        logger.info("Setting values for {}...".format(variable_name))
        if len(used_values.shape) == 1:
            var = self._nc.createVariable(variable_name, get_type(used_values), ("time",), fill_value=fillvalue, chunksizes=(self.time_chunk,), zlib=True)
            self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeries_Orthogonal_Template_v2.0')
            if vertical_axis.size == 1:
                var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
            else:
                # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate
                var.coordinates = "{} latitude longitude".format(self.time_axis_name)
                if unlink_from_profile is True:
                    # Create metadata variable for the sensor_depth
                    if verticals is not None and self._nc.variables.get('sensor_depth') is None:
                        logger.info("Setting the special case 'sensor_depth' metadata variable")
                        inst_depth = self._nc.createVariable('sensor_depth', get_type(verticals))
                        inst_depth.units = 'm'
                        inst_depth.standard_name = 'surface_altitude'
                        inst_depth.positive = self.vertical_positive
                        if self.vertical_positive.lower() == 'down':
                            inst_depth.long_name = 'sensor depth below datum'
                        elif self.vertical_positive.lower() == 'up':
                            inst_depth.long_name = 'sensor height above datum'
                        inst_depth.datum = sensor_vertical_datum or 'Unknown'
                        if verticals and verticals.size > 0:
                            inst_depth[:] = verticals[0]
                        else:
                            inst_depth[:] = self.vertical_fill

        elif len(used_values.shape) == 2:
            var = self._nc.createVariable(variable_name, get_type(used_values), ("time", "z",), fill_value=fillvalue, chunksizes=(self.time_chunk, vertical_axis.size,), zlib=True)
            var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
            self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeriesProfile_Orthogonal_Template_v2.0')
        else:
            raise ValueError("Could not create variable.  Shape of data is {!s}.  Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape)))

        # Set missing_value as well
        attributes = attributes or {}
        attributes['missing_value'] = fillvalue
        # Set the variable attributes as passed in
        if attributes:
            for k, v in attributes.items():

                if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None:
                    # Use this as the vertical datum if it is specified and we didn't already have one
                    try:
                        self.crs.geoid_name = v
                        self.crs.vertical_datum = v
                        self.crs.water_surface_reference_datum = v
                        if not hasattr(self._nc, "geospatial_bounds_vertical_crs"):
                            self._nc.setncattr("geospatial_bounds_vertical_crs", v)
                    except AttributeError:
                        pass

                if k not in ['name', 'coordinates', '_FillValue'] and v is not None:
                    try:
                        var.setncattr(k, v)
                    except BaseException:
                        logger.info('Could not add attribute {}: {}, skipping.'.format(k, v))

        # Add a long name if it doesn't exist
        if not hasattr(var, 'long_name'):
            varunits = getattr(var, 'units', None)
            vartitle = getattr(var, 'standard_name', getattr(var, 'name'))
            vartitle = vartitle.title().replace('_', ' ')
            if varunits is not None:
                vartitle = '{} ({})'.format(vartitle, varunits)
            var.long_name = vartitle
        var.grid_mapping = 'crs'
        var.platform = 'platform'
        var.ancillary_variables = 'platform'
        var.coverage_content_type = 'physicalMeasurement'
        var[:] = used_values

        if create_instrument_variable is True:
            self.add_instrument_variable(variable_name)

        self._nc.sync()
        del used_values
        return var

    def add_variable_object(self, varobject, dimension_map=None, reduce_dims=None):

        dimension_map = dimension_map or {}
        reduce_dims = reduce_dims or False

        fillvalue = -9999.99
        if hasattr(varobject, '_FillValue'):
            fillvalue = varobject._FillValue
        fillvalue = varobject.dtype.type(fillvalue)

        dims = []
        for n in varobject.dimensions:
            d = dimension_map.get(n, n)
            dim_size = varobject.shape[list(varobject.dimensions).index(n)]
            if reduce_dims is True and dim_size in [0, 1]:
                continue

            if d not in self._nc.dimensions:
                self._nc.createDimension(d, dim_size)
            dims.append(d)

        var = self._nc.createVariable(varobject.name, get_type(varobject), dims, fill_value=fillvalue, zlib=True)

        for k in varobject.ncattrs():
            if k not in ['name', '_FillValue']:
                var.setncattr(k, varobject.getncattr(k))

        if reduce_dims:
            var[:] = varobject[:].squeeze()
        else:
            var[:] = varobject[:]

        self._nc.sync()

    def setup_times_and_verticals(self, times, verticals):

        if isinstance(times, (list, tuple,)):
            times = np.asarray(times)

        # Create time as int32 or float64 because DAP does not support int64 until DAP4.
        if get_type(times) == np.int64:
            if times[-1] < 2147483647:
                # We can fit inside of an int32
                times = times.astype(np.int32)
            else:
                # Create time as float32 because of int32 overflow
                times = times.astype(np.float64)

        # If nothing is passed in, set to the vertical_fill value.
        if not isinstance(verticals, np.ndarray) and not verticals:
            verticals = np.ma.masked_values([self.vertical_fill], self.vertical_fill)

        # Convert to masked array
        if isinstance(verticals, (list, tuple)):
            verticals = np.ma.masked_values(verticals, self.vertical_fill)
        elif isinstance(verticals, np.ndarray):
            self.vertical_fill = verticals.dtype.type(self.vertical_fill)
            verticals = np.ma.masked_values(verticals, self.vertical_fill)
        if get_type(verticals) == np.int64:
            # Create time as int32 because DAP does not support int64 until DAP4.
            verticals = verticals.astype(np.int32)

        # Don't unique Time... rely on the person submitting the data correctly.
        # That means we allow duplicate times, as long as the data contains duplicate times as well.
        self.time_indexes = np.argsort(times)
        full_times = times[self.time_indexes]

        # Unique the vertical values
        # Special case for all zeros.  Added here for greater readability.
        if np.isclose(verticals, 0).all():
            save_mask = verticals.mask
            verticals.mask = False
            unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True)
            if save_mask.size > 1:
                unique_verticals.mask = save_mask[self.vertical_indexes]
        elif verticals is not None and verticals.any():
            save_mask = verticals.mask
            verticals.mask = False
            unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True)
            if save_mask.size > 1:
                unique_verticals.mask = save_mask[self.vertical_indexes]
        else:
            unique_verticals = verticals
            self.vertical_indexes = np.arange(len(verticals))

        # Calculate time stats based on a unique time array
        unique_times = np.unique(full_times)
        starting = datetime.utcfromtimestamp(unique_times[0])
        ending   = datetime.utcfromtimestamp(unique_times[-1])

        logger.debug("Setting up time...")
        # Time extents
        self._nc.setncattr("time_coverage_start",    starting.isoformat())
        self._nc.setncattr("time_coverage_end",      ending.isoformat())
        # duration (ISO8601 format)
        self._nc.setncattr("time_coverage_duration", "PT{0:d}S".format(int(round((ending - starting).total_seconds()))))
        # resolution (ISO8601 format)
        # subtract adjacent times to produce an array of differences, then get the most common occurance
        diffs = unique_times[1:] - unique_times[:-1]
        uniqs, inverse = np.unique(diffs, return_inverse=True)
        if uniqs.size > 1:
            time_diffs = diffs[np.bincount(inverse).argmax()]
            self._nc.setncattr("time_coverage_resolution", "PT{0:d}S".format(int(round(time_diffs))))

        # Time
        self.time_chunk = min(full_times.size, 1000)
        self._nc.createDimension("time", full_times.size)
        self.time = self._nc.createVariable(self.time_axis_name, get_type(full_times), ("time",), chunksizes=(self.time_chunk,))
        self.time.units          = "seconds since 1970-01-01T00:00:00Z"
        self.time.standard_name  = "time"
        self.time.long_name      = "time of measurement"
        self.time.calendar       = "gregorian"
        self.time.axis           = "T"
        self.time[:] = full_times

        logger.debug("Setting up {}...".format(self.vertical_axis_name))
        # Figure out if we are creating a Profile or just a TimeSeries
        self._nc.setncattr("geospatial_vertical_units", "meters")
        self._nc.setncattr("geospatial_vertical_positive", self.vertical_positive)
        if unique_verticals.size <= 1:
            # TIMESERIES
            self._nc.setncattr("featureType", "timeSeries")
            # Fill in variable if we have an actual height. Else, the fillvalue remains.
            self._nc.setncattr("geospatial_vertical_resolution", '0')

            self.z = self._nc.createVariable(self.vertical_axis_name, get_type(unique_verticals), fill_value=self.vertical_fill)

            if unique_verticals.size == 1 and not np.isnan(unique_verticals[0]) and unique_verticals[0] != self.vertical_fill:
                # Vertical extents
                self._nc.setncattr("geospatial_vertical_min",      unique_verticals[0])
                self._nc.setncattr("geospatial_vertical_max",      unique_verticals[0])
                self.z.valid_min = unique_verticals[0]
                self.z.valid_max = unique_verticals[0]

        elif unique_verticals.size > 1:
            # TIMESERIES PROFILE
            self._nc.setncattr("featureType", "timeSeriesProfile")
            # Vertical extents
            non_nan_verticals = unique_verticals[ (~np.isnan(unique_verticals)) & (unique_verticals != self.vertical_fill) ]
            minvertical    = float(np.min(non_nan_verticals))
            maxvertical    = float(np.max(non_nan_verticals))
            vertical_diffs = non_nan_verticals[1:] - non_nan_verticals[:-1]
            self._nc.setncattr("geospatial_vertical_min", minvertical)
            self._nc.setncattr("geospatial_vertical_max", maxvertical)
            if vertical_diffs.size >= 1:
                self._nc.setncattr("geospatial_vertical_resolution", " ".join([ str(x) for x in list(vertical_diffs) if not np.isnan(x) ]))
            else:
                self._nc.setncattr("geospatial_vertical_resolution", '0')
            # There is more than one vertical value for this variable, we need to create a vertical dimension
            self._nc.createDimension("z", unique_verticals.size)
            self.z = self._nc.createVariable(self.vertical_axis_name, get_type(unique_verticals), ("z", ), fill_value=self.vertical_fill)
            self.z.valid_min = minvertical
            self.z.valid_max = maxvertical

        self.z.grid_mapping  = 'crs'
        self.z.long_name     = "{} of the sensor relative to the water surface".format(self.vertical_axis_name)
        if self.vertical_positive == 'up':
            self.z.standard_name = 'height'
        elif self.vertical_positive == 'down':
            self.z.standard_name = 'depth'
        self.z.positive      = self.vertical_positive
        self.z.units         = "m"
        self.z.axis          = "Z"
        self.z[:] = unique_verticals

        self._nc.sync()

    @property
    def ncd(self):
        return self._nc

    def __del__(self):
        if hasattr(self, '_nc') and self._nc:
            self._nc.close()
Example #20
0
    def setup_times_and_verticals(self, times, verticals):

        if isinstance(times, (
                list,
                tuple,
        )):
            times = np.asarray(times)

        # If nothing is passed in, set to the vertical_fill value.
        if not isinstance(verticals, np.ndarray) and not verticals:
            verticals = np.ma.masked_values([self.vertical_fill],
                                            self.vertical_fill)

        # Convert to masked array
        if isinstance(verticals, (
                list,
                tuple,
        )) or isinstance(verticals, np.ndarray):
            verticals = np.ma.masked_values(verticals, self.vertical_fill)

        # Don't unique Time... rely on the person submitting the data correctly.
        # That means we allow duplicate times, as long as the data contains duplicate times as well.
        self.time_indexes = np.argsort(times)
        unique_times = times[self.time_indexes]

        # Unique the vertical values
        # Special case for all zeros.  Added here for greater readability.
        if np.isclose(verticals, 0).all():
            save_mask = verticals.mask
            verticals.mask = False
            unique_verticals, self.vertical_indexes = np.ma.unique(
                verticals, return_index=True)
            if save_mask.size > 1:
                unique_verticals.mask = save_mask[self.vertical_indexes]
        elif verticals is not None and verticals.any():
            save_mask = verticals.mask
            verticals.mask = False
            unique_verticals, self.vertical_indexes = np.ma.unique(
                verticals, return_index=True)
            if save_mask.size > 1:
                unique_verticals.mask = save_mask[self.vertical_indexes]
        else:
            unique_verticals = verticals
            self.vertical_indexes = np.arange(len(verticals))

        starting = datetime.utcfromtimestamp(unique_times[0])
        ending = datetime.utcfromtimestamp(unique_times[-1])

        with EnhancedDataset(self.out_file, 'a') as nc:
            logger.debug("Setting up time...")
            # Time extents
            nc.setncattr("time_coverage_start", starting.isoformat())
            nc.setncattr("time_coverage_end", ending.isoformat())
            # duration (ISO8601 format)
            nc.setncattr(
                "time_coverage_duration",
                "P%sS" % str(int(round((ending - starting).total_seconds()))))
            # resolution (ISO8601 format)
            # subtract adjacent times to produce an array of differences, then get the most common occurance
            diffs = unique_times[1:] - unique_times[:-1]
            uniqs, inverse = np.unique(diffs, return_inverse=True)
            if uniqs.size > 1:
                time_diffs = diffs[np.bincount(inverse).argmax()]
                nc.setncattr("time_coverage_resolution",
                             "P%sS" % str(int(round(time_diffs))))

            # Time - 32-bit unsigned integer
            nc.createDimension("time")
            self.time = nc.createVariable(self.time_axis_name,
                                          "f8", ("time", ),
                                          chunksizes=(1000, ))
            self.time.units = "seconds since 1970-01-01T00:00:00Z"
            self.time.standard_name = "time"
            self.time.long_name = "time of measurement"
            self.time.calendar = "gregorian"
            self.time[:] = unique_times

            logger.debug("Setting up {}...".format(self.vertical_axis_name))
            # Figure out if we are creating a Profile or just a TimeSeries
            nc.setncattr("geospatial_vertical_units", "meters")
            nc.setncattr("geospatial_vertical_positive",
                         self.vertical_positive)
            if unique_verticals.size <= 1:
                # TIMESERIES
                nc.setncattr("featureType", "timeSeries")
                # Fill in variable if we have an actual height. Else, the fillvalue remains.
                nc.setncattr("geospatial_vertical_resolution", '0')
                if unique_verticals.size == 1 and not np.isnan(
                        unique_verticals[0]
                ) and unique_verticals[0] != self.vertical_fill:
                    # Vertical extents
                    nc.setncattr("geospatial_vertical_min",
                                 unique_verticals[0])
                    nc.setncattr("geospatial_vertical_max",
                                 unique_verticals[0])
                self.z = nc.createVariable(self.vertical_axis_name,
                                           "f8",
                                           fill_value=self.vertical_fill)

            elif unique_verticals.size > 1:
                # TIMESERIES PROFILE
                nc.setncattr("featureType", "timeSeriesProfile")
                # Vertical extents
                non_nan_verticals = unique_verticals[
                    (~np.isnan(unique_verticals))
                    & (unique_verticals != self.vertical_fill)]
                minvertical = float(np.min(non_nan_verticals))
                maxvertical = float(np.max(non_nan_verticals))
                vertical_diffs = non_nan_verticals[1:] - non_nan_verticals[:-1]
                nc.setncattr("geospatial_vertical_min", minvertical)
                nc.setncattr("geospatial_vertical_max", maxvertical)
                if vertical_diffs.size >= 1:
                    nc.setncattr(
                        "geospatial_vertical_resolution", " ".join([
                            str(x) for x in list(vertical_diffs)
                            if not np.isnan(x)
                        ]))
                else:
                    nc.setncattr("geospatial_vertical_resolution", '0')
                # There is more than one vertical value for this variable, we need to create a vertical dimension
                nc.createDimension("z", unique_verticals.size)
                self.z = nc.createVariable(self.vertical_axis_name,
                                           "f8", ("z", ),
                                           fill_value=self.vertical_fill)

            self.z.grid_mapping = 'crs'
            self.z.long_name = "{} of the sensor relative to the water surface".format(
                self.vertical_axis_name)
            if self.vertical_positive == 'up':
                self.z.standard_name = 'height'
            elif self.vertical_positive == 'down':
                self.z.standard_name = 'depth'
            self.z.positive = self.vertical_positive
            self.z.units = "m"
            self.z.axis = "Z"
            self.z[:] = unique_verticals
Example #21
0
 def ncd(self):
     with EnhancedDataset(self.out_file, 'r') as nc:
         return nc
Example #22
0
def normalize_epic_codes(netcdf_file, original_filename):
    with EnhancedDataset(netcdf_file, 'a') as nc:
        for v in nc.variables:
            nc_var = nc.variables.get(v)
            if v in variable_name_overrides:
                ec = variable_name_overrides.get(v).get('epic_code', None)
                if ec is not None:
                    nc_var.epic_code = ec
                overrides = variable_name_overrides.get(v).get(
                    'overrides', dict())
                for k, d in overrides.items():
                    if k == 'convert':
                        nc_var[:] = d(nc_var[:])
                    elif k != 'original_units':
                        nc_var.setncattr(k, d)

            if hasattr(nc_var, 'long_name'):
                if not hasattr(nc_var, 'epic_code') or (
                        hasattr(nc_var, 'epic_code')
                        and nc_var.epic_code in IGNORABLE_CODES):
                    lookup_long_name = nc_var.long_name.lower().strip()
                    if lookup_long_name in long_name_overrides:
                        ec = long_name_overrides.get(lookup_long_name).get(
                            'epic_code', None)
                        if ec is not None:
                            nc_var.epic_code = ec
                        overrides = long_name_overrides.get(
                            lookup_long_name).get('overrides', dict())
                        for k, d in overrides.items():
                            if k == 'convert':
                                nc_var[:] = d(nc_var[:])
                            elif k != 'original_units':
                                nc_var.setncattr(k, d)

            if hasattr(nc_var, "epic_code") and nc_var.epic_code:
                try:
                    epic_code = int(nc_var.epic_code)
                except ValueError:
                    logger.debug("No EPIC code specified on {0}".format(v))
                else:

                    # Specialized cases for generic EPIC codes
                    if epic_code in special_map:
                        attribs = special_map.get(epic_code)(nc_var,
                                                             original_filename)
                    else:
                        attribs = epic2cf.mapping.get(epic_code)

                    # Special case for 'Onset weather stations'.
                    # https://github.com/USGS-CMG/usgs-cmg-portal/issues/69
                    if epic_code in [905, 908
                                     ] and 'hml' in netcdf_file.lower():
                        attribs.standard_name = 'surface_downwelling_photosynthetic_radiative_flux_in_air'

                    if attribs is not None and attribs.standard_name is not None:
                        # Convert data to CF units
                        nc_var[:] = attribs.convert(nc_var[:])
                        # Set attributes
                        nc_var.standard_name = attribs.standard_name
                        nc_var.long_name = attribs.long_name
                        nc_var.units = attribs.cf_units
                        nc_var.epic_code = epic_code  # Set it again to be sure it is an int
                        if attribs.cell_methods is not None:
                            nc_var.cell_methods = attribs.cell_methods
                    else:
                        logger.debug(
                            "Could not find CF mapping for EPIC code {!s}".
                            format(epic_code))
Example #23
0
    def __init__(self,
                 output_directory,
                 latitude,
                 longitude,
                 station_name,
                 global_attributes,
                 times=None,
                 verticals=None,
                 vertical_fill=None,
                 output_filename=None,
                 vertical_axis_name=None,
                 vertical_positive=None):
        if output_filename is None:
            output_filename = '{}_{}.nc'.format(station_name,
                                                int(random.random() * 100000))
            logger.info("No output filename specified, saving as {}".format(
                output_filename))

        self.vertical_positive = vertical_positive or 'down'
        self.vertical_axis_name = vertical_axis_name or 'z'
        self.time_axis_name = 'time'

        # Make directory
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        self.time = None

        self.out_file = os.path.abspath(
            os.path.join(output_directory, output_filename))
        if os.path.isfile(self.out_file):
            os.remove(self.out_file)

        with EnhancedDataset(self.out_file, 'w') as nc:
            # Global attributes
            # These are set by this script, we don't someone to be able to set them manually
            global_skips = [
                "time_coverage_start", "time_coverage_end",
                "time_coverage_duration", "time_coverage_resolution",
                "featureType", "geospatial_vertical_positive",
                "geospatial_vertical_min", "geospatial_vertical_max",
                "geospatial_lat_min", "geospatial_lon_min",
                "geospatial_lat_max", "geospatial_lon_max",
                "geospatial_vertical_resolution", "Conventions", "date_created"
            ]
            for k, v in global_attributes.items():
                if v is None:
                    v = "None"
                if k not in global_skips:
                    nc.setncattr(k, v)
            nc.setncattr("Conventions", "CF-1.6")
            nc.setncattr("date_created",
                         datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z"))
            nc.setncattr("date_issued",
                         datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z"))
            nc.setncattr('cdm_data_type', 'Station')

            # Station name
            nc.createDimension("feature_type_instance", len(station_name))
            name = nc.createVariable("feature_type_instance", "S1",
                                     ("feature_type_instance", ))
            name.cf_role = "timeseries_id"
            name.long_name = "Identifier for each feature type instance"
            name[:] = list(station_name)

            # Location
            lat = nc.createVariable("latitude", "f8")
            lat.units = "degrees_north"
            lat.standard_name = "latitude"
            lat.long_name = "sensor latitude"
            lat[:] = latitude
            nc.setncattr("geospatial_lat_min", latitude)
            nc.setncattr("geospatial_lat_max", latitude)
            nc.setncattr("geospatial_lat_units", "degrees_north")

            lon = nc.createVariable("longitude", "f8")
            lon.units = "degrees_east"
            lon.standard_name = "longitude"
            lon.long_name = "sensor longitude"
            lon[:] = longitude
            nc.setncattr("geospatial_lon_min", longitude)
            nc.setncattr("geospatial_lon_max", longitude)
            nc.setncattr("geospatial_lon_units", "degrees_east")

            # Metadata variables
            self.crs = nc.createVariable("crs", "i4")
            self.crs.long_name = "http://www.opengis.net/def/crs/EPSG/0/4326"
            self.crs.grid_mapping_name = "latitude_longitude"
            self.crs.epsg_code = "EPSG:4326"
            self.crs.semi_major_axis = float(6378137.0)
            self.crs.inverse_flattening = float(298.257223563)

            platform = nc.createVariable("platform", "i4")
            platform.ioos_code = station_name
            platform.short_name = global_attributes.get("title", station_name)
            platform.long_name = global_attributes.get("description",
                                                       station_name)
            platform.definition = "http://mmisw.org/ont/ioos/definition/stationID"
            nc.setncattr('platform', 'platform')

            if vertical_fill is None:
                vertical_fill = -9999.9
            self.vertical_fill = vertical_fill

            self.setup_times_and_verticals(times, verticals)
            logger.info("Created file at '{}'".format(self.out_file))