Example #1
0
    def from_string(urn_string):
        complete = urn_string.split('#')
        extras = ''
        if len(complete) > 1:
            extras = '#{0}'.format(complete[1])
        parts = complete[0].split(':')

        if len(parts) < 5:
            return IoosUrn()
        urn = IoosUrn()
        urn.asset_type = parts[2]
        urn.authority = parts[3]
        urn.label = parts[4]
        if len(parts) > 5:
            if urn.asset_type == 'station':
                urn.version = parts[5]
            elif len(parts) > 6:
                # Also a verion specified, so this has to be the component
                urn.component = parts[5] + extras
            else:
                logger.debug(
                    "Assuming that {0} is the 'component' piece of the URN (not the 'version')"
                    .format(parts[5] + extras))
                urn.component = parts[5] + extras
        if len(parts) > 6:
            urn.version = parts[6]
        if len(parts) > 7:
            pass
            logger.warning("The URN is too long stripping off '{}'".format(
                ':'.join(parts[7:])))
        return urn
Example #2
0
    def from_string(urn_string):
        complete = urn_string.split('#')
        extras = ''
        if len(complete) > 1:
            extras = '#{0}'.format(complete[1])
        parts = complete[0].split(':')

        if len(parts) < 5:
            return IoosUrn()
        urn            = IoosUrn()
        urn.asset_type = parts[2]
        urn.authority  = parts[3]
        urn.label      = parts[4]
        if len(parts) > 5:
            if urn.asset_type == 'station':
                urn.version = parts[5]
            elif len(parts) > 6:
                # Also a verion specified, so this has to be the component
                urn.component = parts[5] + extras
            else:
                logger.debug("Assuming that {0} is the 'component' piece of the URN (not the 'version')".format(parts[5] + extras))
                urn.component = parts[5] + extras
        if len(parts) > 6:
            urn.version = parts[6]
        if len(parts) > 7:
            pass
            logger.warning("The URN is too long stripping off '{}'".format(':'.join(parts[7:])))
        return urn
Example #3
0
    def valid(self):
        ASSET_TYPES = ['station', 'network', 'sensor', 'survey']

        try:
            assert self.authority is not None
        except AssertionError:
            logger.debug('URN not valid - An "authority" is required')
            return False

        try:
            assert self.label is not None
        except AssertionError:
            logger.debug('URN not valid - A "label" is required')
            return False

        try:
            assert self.asset_type in ASSET_TYPES
        except AssertionError:
            logger.debug(
                'URN not valid - asset_type {0} is unknown.  Must be one of: {1}'
                .format(self.asset_type, ', '.join(ASSET_TYPES)))
            return False

        if self.asset_type == 'station':
            try:
                assert self.component is None
            except AssertionError:
                logger.debug(
                    'URN not valid - An asset_type of "station" may not have a "component".'
                )
                return False

        return True
Example #4
0
    def load(cls, path):

        fpath = os.path.realpath(path)
        subs = list(all_subclasses(cls))
        dsg = cls(fpath)

        try:
            for klass in subs:
                logger.debug('Trying {}...'.format(klass.__name__))
                if hasattr(klass, 'is_mine'):
                    if klass.is_mine(dsg):
                        dsg.close()
                        return klass(path)
        finally:
            dsg.close()

        subnames = ', '.join([ s.__name__ for s in subs ])
        raise ValueError('Could not open {} as any type of CF Dataset. Tried: {}.'.format(fpath, subnames))
Example #5
0
    def to_dataframe(self, clean_cols=True, clean_rows=True):
        # Z
        zvar = self.z_axes()[0]
        z = np.ma.fix_invalid(np.ma.MaskedArray(zvar[:]))
        z = z.flatten().round(5)
        logger.debug(['z data size: ', z.size])

        # T
        tvar = self.t_axes()[0]
        t = np.ma.MaskedArray(nc4.num2date(tvar[:], tvar.units, getattr(tvar, 'calendar', 'standard'))).flatten()
        # Patch the time variable back to its original mask, since num2date
        # breaks any missing/fill values
        if hasattr(tvar[0], 'mask'):
            t.mask = tvar[:].mask
        logger.debug(['time data size: ', t.size])

        # X
        xvar = self.x_axes()[0]
        x = np.ma.fix_invalid(np.ma.MaskedArray(xvar[:])).flatten().round(5)
        logger.debug(['x data size: ', x.size])

        # Y
        yvar = self.y_axes()[0]
        y = np.ma.fix_invalid(np.ma.MaskedArray(yvar[:])).flatten().round(5)
        logger.debug(['y data size: ', y.size])

        # Trajectories
        pvar = self.get_variables_by_attributes(cf_role='trajectory_id')[0]

        try:
            p = normalize_array(pvar)
        except BaseException:
            logger.exception('Could not pull trajectory values from the variable, using indexes.')
            p = np.asarray(list(range(len(pvar))), dtype=np.integer)

        # The Dimension that the trajectory id variable doesn't have is what
        # the trajectory data needs to be repeated by
        dim_diff = self.dimensions[list(set(tvar.dimensions).difference(set(pvar.dimensions)))[0]]
        if dim_diff:
            p = p.repeat(dim_diff.size)
        logger.debug(['trajectory data size: ', p.size])

        # Distance
        d = np.append([0], great_distance(start_latitude=y[0:-1], end_latitude=y[1:], start_longitude=x[0:-1], end_longitude=x[1:])['distance'])
        d = np.ma.fix_invalid(np.ma.MaskedArray(np.cumsum(d)).astype(np.float64).round(2))
        logger.debug(['distance data size: ', d.size])

        df_data = {
            't': t,
            'x': x,
            'y': y,
            'z': z,
            'trajectory': p,
            'distance': d
        }

        building_index_to_drop = np.ones(t.size, dtype=bool)
        extract_vars = list(set(self.data_vars() + self.ancillary_vars()))
        for i, dvar in enumerate(extract_vars):
            vdata = np.ma.fix_invalid(np.ma.MaskedArray(dvar[:].round(3).flatten()))
            building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True)  # noqa
            df_data[dvar.name] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Example #6
0
    def to_dataframe(self, clean_cols=True, clean_rows=True):

        zvar = self.z_axes()[0]
        zs = len(self.dimensions[zvar.dimensions[0]])

        # Profiles
        pvar = self.get_variables_by_attributes(cf_role='profile_id')[0]
        try:
            p = normalize_array(pvar)
        except ValueError:
            p = np.asarray(list(range(len(pvar))), dtype=np.integer)
        ps = p.size
        p = p.repeat(zs)
        logger.debug(['profile data size: ', p.size])

        # Z
        z = generic_masked(zvar[:], attrs=self.vatts(zvar.name)).round(5)
        try:
            z = np.tile(z, ps)
        except ValueError:
            z = z.flatten()
        logger.debug(['z data size: ', z.size])

        # T
        tvar = self.t_axes()[0]
        t = nc4.num2date(tvar[:], tvar.units,
                         getattr(tvar, 'calendar', 'standard'))
        if isinstance(t, datetime):
            # Size one
            t = np.array([t.isoformat()], dtype='datetime64')
        t = t.repeat(zs)
        logger.debug(['time data size: ', t.size])

        # X
        xvar = self.x_axes()[0]
        x = generic_masked(xvar[:].repeat(zs),
                           attrs=self.vatts(xvar.name)).round(5)
        logger.debug(['x data size: ', x.size])

        # Y
        yvar = self.y_axes()[0]
        y = generic_masked(yvar[:].repeat(zs),
                           attrs=self.vatts(yvar.name)).round(5)
        logger.debug(['y data size: ', y.size])

        # Distance
        d = np.ma.zeros(y.size, dtype=np.float64)
        d[1:] = great_distance(start_latitude=y[0:-1],
                               end_latitude=y[1:],
                               start_longitude=x[0:-1],
                               end_longitude=x[1:])['distance']
        d = generic_masked(np.cumsum(d), minv=0).round(2)
        logger.debug(['distance data size: ', d.size])

        df_data = {'t': t, 'x': x, 'y': y, 'z': z, 'profile': p, 'distance': d}

        building_index_to_drop = np.ones(t.size, dtype=bool)
        extract_vars = list(set(self.data_vars() + self.ancillary_vars()))
        for i, dvar in enumerate(extract_vars):
            vdata = np.ma.fix_invalid(
                np.ma.MaskedArray(dvar[:].round(3).flatten()))
            building_index_to_drop = (building_index_to_drop == True) & (
                vdata.mask == True)  # noqa
            df_data[dvar.name] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Example #7
0
    def setup_times_and_verticals(self, times, verticals):

        if isinstance(times, (list, tuple,)):
            times = np.asarray(times)

        # Create time as int32 or float64 because DAP does not support int64 until DAP4.
        if get_type(times) == np.int64:
            if times[-1] < 2147483647:
                # We can fit inside of an int32
                times = times.astype(np.int32)
            else:
                # Create time as float32 because of int32 overflow
                times = times.astype(np.float64)

        # If nothing is passed in, set to the vertical_fill value.
        if not isinstance(verticals, np.ndarray) and not verticals:
            verticals = np.ma.masked_values([self.vertical_fill], self.vertical_fill)

        # Convert to masked array
        if isinstance(verticals, (list, tuple)):
            verticals = np.ma.masked_values(verticals, self.vertical_fill)
        elif isinstance(verticals, np.ndarray):
            self.vertical_fill = verticals.dtype.type(self.vertical_fill)
            verticals = np.ma.masked_values(verticals, self.vertical_fill)
        if get_type(verticals) == np.int64:
            # Create time as int32 because DAP does not support int64 until DAP4.
            verticals = verticals.astype(np.int32)

        # Don't unique Time... rely on the person submitting the data correctly.
        # That means we allow duplicate times, as long as the data contains duplicate times as well.
        self.time_indexes = np.argsort(times)
        full_times = times[self.time_indexes]

        # Unique the vertical values
        # Special case for all zeros.  Added here for greater readability.
        if np.isclose(verticals, 0).all():
            save_mask = verticals.mask
            verticals.mask = False
            unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True)
            if save_mask.size > 1:
                unique_verticals.mask = save_mask[self.vertical_indexes]
        elif verticals is not None and verticals.any():
            save_mask = verticals.mask
            verticals.mask = False
            unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True)
            if save_mask.size > 1:
                unique_verticals.mask = save_mask[self.vertical_indexes]
        else:
            unique_verticals = verticals
            self.vertical_indexes = np.arange(len(verticals))

        # Calculate time stats based on a unique time array
        unique_times = np.unique(full_times)
        starting = datetime.utcfromtimestamp(unique_times[0])
        ending   = datetime.utcfromtimestamp(unique_times[-1])

        logger.debug("Setting up time...")
        # Time extents
        self._nc.setncattr("time_coverage_start",    starting.isoformat())
        self._nc.setncattr("time_coverage_end",      ending.isoformat())
        # duration (ISO8601 format)
        self._nc.setncattr("time_coverage_duration", "PT{0:d}S".format(int(round((ending - starting).total_seconds()))))
        # resolution (ISO8601 format)
        # subtract adjacent times to produce an array of differences, then get the most common occurance
        diffs = unique_times[1:] - unique_times[:-1]
        uniqs, inverse = np.unique(diffs, return_inverse=True)
        if uniqs.size > 1:
            time_diffs = diffs[np.bincount(inverse).argmax()]
            self._nc.setncattr("time_coverage_resolution", "PT{0:d}S".format(int(round(time_diffs))))

        # Time
        self.time_chunk = min(full_times.size, 1000)
        self._nc.createDimension("time", full_times.size)
        self.time = self._nc.createVariable(self.time_axis_name, get_type(full_times), ("time",), chunksizes=(self.time_chunk,))
        self.time.units          = "seconds since 1970-01-01T00:00:00Z"
        self.time.standard_name  = "time"
        self.time.long_name      = "time of measurement"
        self.time.calendar       = "gregorian"
        self.time.axis           = "T"
        self.time[:] = full_times

        logger.debug("Setting up {}...".format(self.vertical_axis_name))
        # Figure out if we are creating a Profile or just a TimeSeries
        self._nc.setncattr("geospatial_vertical_units", "meters")
        self._nc.setncattr("geospatial_vertical_positive", self.vertical_positive)
        if unique_verticals.size <= 1:
            # TIMESERIES
            self._nc.setncattr("featureType", "timeSeries")
            # Fill in variable if we have an actual height. Else, the fillvalue remains.
            self._nc.setncattr("geospatial_vertical_resolution", '0')

            self.z = self._nc.createVariable(self.vertical_axis_name, get_type(unique_verticals), fill_value=self.vertical_fill)

            if unique_verticals.size == 1 and not np.isnan(unique_verticals[0]) and unique_verticals[0] != self.vertical_fill:
                # Vertical extents
                self._nc.setncattr("geospatial_vertical_min",      unique_verticals[0])
                self._nc.setncattr("geospatial_vertical_max",      unique_verticals[0])
                self.z.valid_min = unique_verticals[0]
                self.z.valid_max = unique_verticals[0]

        elif unique_verticals.size > 1:
            # TIMESERIES PROFILE
            self._nc.setncattr("featureType", "timeSeriesProfile")
            # Vertical extents
            non_nan_verticals = unique_verticals[ (~np.isnan(unique_verticals)) & (unique_verticals != self.vertical_fill) ]
            minvertical    = float(np.min(non_nan_verticals))
            maxvertical    = float(np.max(non_nan_verticals))
            vertical_diffs = non_nan_verticals[1:] - non_nan_verticals[:-1]
            self._nc.setncattr("geospatial_vertical_min", minvertical)
            self._nc.setncattr("geospatial_vertical_max", maxvertical)
            if vertical_diffs.size >= 1:
                self._nc.setncattr("geospatial_vertical_resolution", " ".join([ str(x) for x in list(vertical_diffs) if not np.isnan(x) ]))
            else:
                self._nc.setncattr("geospatial_vertical_resolution", '0')
            # There is more than one vertical value for this variable, we need to create a vertical dimension
            self._nc.createDimension("z", unique_verticals.size)
            self.z = self._nc.createVariable(self.vertical_axis_name, get_type(unique_verticals), ("z", ), fill_value=self.vertical_fill)
            self.z.valid_min = minvertical
            self.z.valid_max = maxvertical

        self.z.grid_mapping  = 'crs'
        self.z.long_name     = "{} of the sensor relative to the water surface".format(self.vertical_axis_name)
        if self.vertical_positive == 'up':
            self.z.standard_name = 'height'
        elif self.vertical_positive == 'down':
            self.z.standard_name = 'depth'
        self.z.positive      = self.vertical_positive
        self.z.units         = "m"
        self.z.axis          = "Z"
        self.z[:] = unique_verticals

        self._nc.sync()
Example #8
0
    def setup_times_and_verticals(self, times, verticals):

        if isinstance(times, (list, tuple,)):
            times = np.asarray(times)

        # If nothing is passed in, set to the vertical_fill value.
        if not isinstance(verticals, np.ndarray) and not verticals:
            verticals = np.ma.masked_values([self.vertical_fill], self.vertical_fill)

        # Convert to masked array
        if isinstance(verticals, (list, tuple,)) or isinstance(verticals, np.ndarray):
            verticals = np.ma.masked_values(verticals, self.vertical_fill)

        # Don't unique Time... rely on the person submitting the data correctly.
        # That means we allow duplicate times, as long as the data contains duplicate times as well.
        self.time_indexes = np.argsort(times)
        unique_times = times[self.time_indexes]

        # Unique the vertical values
        # Special case for all zeros.  Added here for greater readability.
        if np.isclose(verticals, 0).all():
            save_mask = verticals.mask
            verticals.mask = False
            unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True)
            if save_mask.size > 1:
                unique_verticals.mask = save_mask[self.vertical_indexes]
        elif verticals is not None and verticals.any():
            save_mask = verticals.mask
            verticals.mask = False
            unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True)
            if save_mask.size > 1:
                unique_verticals.mask = save_mask[self.vertical_indexes]
        else:
            unique_verticals = verticals
            self.vertical_indexes = np.arange(len(verticals))

        starting = datetime.utcfromtimestamp(unique_times[0])
        ending   = datetime.utcfromtimestamp(unique_times[-1])

        logger.debug("Setting up time...")
        # Time extents
        self.nc.setncattr("time_coverage_start",    starting.isoformat())
        self.nc.setncattr("time_coverage_end",      ending.isoformat())
        # duration (ISO8601 format)
        self.nc.setncattr("time_coverage_duration", "P%sS" % unicode(int(round((ending - starting).total_seconds()))))
        # resolution (ISO8601 format)
        # subtract adjacent times to produce an array of differences, then get the most common occurance
        diffs = unique_times[1:] - unique_times[:-1]
        uniqs, inverse = np.unique(diffs, return_inverse=True)
        if uniqs.size > 1:
            time_diffs = diffs[np.bincount(inverse).argmax()]
            self.nc.setncattr("time_coverage_resolution", "P%sS" % unicode(int(round(time_diffs))))

        # Time - 32-bit unsigned integer
        self.nc.createDimension("time")
        self.time = self.nc.createVariable(self.time_axis_name,    "f8", ("time",), chunksizes=(1000,))
        self.time.units          = "seconds since 1970-01-01T00:00:00Z"
        self.time.standard_name  = "time"
        self.time.long_name      = "time of measurement"
        self.time.calendar       = "gregorian"
        self.time[:] = unique_times

        logger.debug("Setting up {}...".format(self.vertical_axis_name))
        # Figure out if we are creating a Profile or just a TimeSeries
        if unique_verticals.size <= 1:
            # TIMESERIES
            self.nc.setncattr("featureType", "timeSeries")
            # Fill in variable if we have an actual height. Else, the fillvalue remains.
            if unique_verticals.any() and unique_verticals.size == 1:
                # Vertical extents
                self.nc.setncattr("geospatial_vertical_positive", self.vertical_positive)
                self.nc.setncattr("geospatial_vertical_min",      unique_verticals[0])
                self.nc.setncattr("geospatial_vertical_max",      unique_verticals[0])
            self.z = self.nc.createVariable(self.vertical_axis_name,     "f8", fill_value=self.vertical_fill)

        elif unique_verticals.size > 1:
            # TIMESERIES PROFILE
            self.nc.setncattr("featureType", "timeSeriesProfile")
            # Vertical extents
            minvertical    = float(np.min(unique_verticals))
            maxvertical    = float(np.max(unique_verticals))
            vertical_diffs = unique_verticals[1:] - unique_verticals[:-1]
            self.nc.setncattr("geospatial_vertical_positive",   self.vertical_positive)
            self.nc.setncattr("geospatial_vertical_min",        minvertical)
            self.nc.setncattr("geospatial_vertical_max",        maxvertical)
            self.nc.setncattr("geospatial_vertical_resolution", " ".join(map(unicode, list(vertical_diffs))))
            # There is more than one vertical value for this variable, we need to create a vertical dimension
            self.nc.createDimension("z", unique_verticals.size)
            self.z = self.nc.createVariable(self.vertical_axis_name,     "f8", ("z", ), fill_value=self.vertical_fill)

        self.z.grid_mapping  = 'crs'
        self.z.long_name     = "{} of the sensor relative to the water surface".format(self.vertical_axis_name)
        self.z.standard_name = self.vertical_axis_name
        self.z.positive      = self.vertical_positive
        self.z.units         = "m"
        self.z.axis          = "Z"
        self.z[:] = unique_verticals
        self.nc.sync()