Example #1
0
    def from_string(urn_string):
        complete = urn_string.split('#')
        extras = ''
        if len(complete) > 1:
            extras = '#{0}'.format(complete[1])
        parts = complete[0].split(':')

        if len(parts) < 5:
            return IoosUrn()
        urn            = IoosUrn()
        urn.asset_type = parts[2]
        urn.authority  = parts[3]
        urn.label      = parts[4]
        if len(parts) > 5:
            if urn.asset_type == 'station':
                urn.version = parts[5]
            elif len(parts) > 6:
                # Also a verion specified, so this has to be the component
                urn.component = parts[5] + extras
            else:
                logger.debug("Assuming that {0} is the 'component' piece of the URN (not the 'version')".format(parts[5] + extras))
                urn.component = parts[5] + extras
        if len(parts) > 6:
            urn.version = parts[6]
        if len(parts) > 7:
            pass
            logger.warning("The URN is too long stripping off '{}'".format(':'.join(parts[7:])))
        return urn
Example #2
0
    def from_string(urn_string):
        complete = urn_string.split('#')
        extras = ''
        if len(complete) > 1:
            extras = '#{0}'.format(complete[1])
        parts = complete[0].split(':')

        if len(parts) < 5:
            return IoosUrn()
        urn = IoosUrn()
        urn.asset_type = parts[2]
        urn.authority = parts[3]
        urn.label = parts[4]
        if len(parts) > 5:
            if urn.asset_type == 'station':
                urn.version = parts[5]
            elif len(parts) > 6:
                # Also a verion specified, so this has to be the component
                urn.component = parts[5] + extras
            else:
                logger.debug(
                    "Assuming that {0} is the 'component' piece of the URN (not the 'version')"
                    .format(parts[5] + extras))
                urn.component = parts[5] + extras
        if len(parts) > 6:
            urn.version = parts[6]
        if len(parts) > 7:
            pass
            logger.warning("The URN is too long stripping off '{}'".format(
                ':'.join(parts[7:])))
        return urn
Example #3
0
def get_dataframe_from_variable(nc, data_var):
    """ Returns a Pandas DataFrame of the data.
        This always returns positive down depths
    """
    time_var = nc.get_variables_by_attributes(standard_name='time')[0]

    depth_vars = nc.get_variables_by_attributes(axis=lambda v: v is not None and v.lower() == 'z')
    depth_vars += nc.get_variables_by_attributes(standard_name=lambda v: v in ['height', 'depth' 'surface_altitude'], positive=lambda x: x is not None)

    # Find the correct depth variable
    depth_var = None
    for d in depth_vars:
        try:
            if d._name in data_var.coordinates.split(" ") or d._name in data_var.dimensions:
                depth_var = d
                break
        except AttributeError:
            continue

    times  = netCDF4.num2date(time_var[:], units=time_var.units, calendar=getattr(time_var, 'calendar', 'standard'))
    original_times_size = times.size

    if depth_var is None and hasattr(data_var, 'sensor_depth'):
        depth_type = get_type(data_var.sensor_depth)
        depths = np.asarray([data_var.sensor_depth] * len(times)).flatten()
        values = data_var[:].flatten()
    elif depth_var is None:
        depths = np.asarray([np.nan] * len(times)).flatten()
        depth_type = get_type(depths)
        values = data_var[:].flatten()
    else:
        depths = depth_var[:]
        depth_type = get_type(depths)
        if len(data_var.shape) > 1:
            times = np.repeat(times, depths.size)
            depths = np.tile(depths, original_times_size)
            values = data_var[:, :].flatten()
        else:
            values = data_var[:].flatten()

        if getattr(depth_var, 'positive', 'down').lower() == 'up':
            logger.warning("Converting depths to positive down before returning the DataFrame")
            depths = depths * -1

    # https://github.com/numpy/numpy/issues/4595
    # We can't call astype on a MaskedConstant
    if (
        isinstance(depths, np.ma.core.MaskedConstant) or
        (hasattr(depths, 'mask') and depths.mask.all())
    ):
        depths = np.asarray([np.nan] * len(times)).flatten()

    df = pd.DataFrame({ 'time':   times,
                        'value':  values.astype(data_var.dtype),
                        'unit':   data_var.units if hasattr(data_var, 'units') else np.nan,
                        'depth':  depths.astype(depth_type) })

    df.set_index([pd.DatetimeIndex(df['time']), pd.Float64Index(df['depth'])], inplace=True)
    return df
Example #4
0
    def global_attributes(self, gas):
        # These are set by this script, we don't someone to be able to set them manually
        global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution",
                        "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max",
                        "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max",
                        "Conventions", "date_created", "cdm_data_type"]

        for i in set(global_skips) & gas.keys():
            logger.warning("Ignoring global attribute {} because it is calculated or set automatically".format(i))

        self._global_attributes = { k: v for k, v in gas.items() if k not in global_skips }
Example #5
0
    def export(self, output_file):
        super(IncompleteProfile, self).export(output_file)

        with netCDF4.Dataset(output_file, 'w', clobber=True) as nc:

            gas = self.global_attributes
            nc.setncatts(gas)

            profiles = self.df.profile.unique().size
            profile_group = self.df.groupby('profile')
            max_z = profile_group.size().max()

            nc.createDimension('profile', profiles)
            nc.createDimension('z', max_z)

            profile = nc.createVariable('profile', self.df.profile.dtype, ('profile',))
            _, unique_profile_rows = np.unique(self.df.profile.values, return_index=True)
            profile[:] = list(range(profiles))

            time = nc.createVariable('time', int, ('profile',))
            time[:] = netCDF4.date2num([datetime.utcfromtimestamp(t) for t in self.df.time.unique().astype('<M8[s]').astype(int)], units=self.base_time)

            latitude = nc.createVariable('latitude', self.df.latitude.dtype, ('profile',))
            latitude[:] = self.df.latitude.values[unique_profile_rows]

            longitude = nc.createVariable('longitude', self.df.longitude.dtype, ('profile',))
            longitude[:] = self.df.longitude.values[unique_profile_rows]

            # Metadata variables
            nc.createVariable("crs", 'i4')
            nc.createVariable("platform", "i4")
            nc.setncattr('platform', 'platform')

            # Data vars
            reserved_columns = ['profile', 'time', 'latitude', 'longitude']
            for i, (name, p) in enumerate(profile_group):
                for c in [d for d in self.df.columns if d not in reserved_columns]:
                    var_name = c.split(' ')[0].lower()
                    fill = p[c].dtype.type(self.fill_value)
                    if var_name not in nc.variables:
                        v = nc.createVariable(var_name, self.df[c].dtype, ('profile', 'z'), fill_value=fill)
                    else:
                        v = nc.variables[var_name]
                    assignable_values = p[c].fillna(fill).values
                    v[i, :len(assignable_values)] = assignable_values

            for k, v in self.variable_attributes.items():
                if k in nc.variables:
                    for n, z in v.items():
                        try:
                            nc.variables[k].setncattr(n, z)
                        except BaseException:
                            logger.warning('Could not set attribute {} on {}'.format(n, k))
Example #6
0
    def from_dataframe(df, output_directory, output_filename, latitude, longitude, station_name, global_attributes, variable_name, variable_attributes, sensor_vertical_datum=None, fillvalue=None, data_column=None, vertical_axis_name=None, vertical_positive=None, create_instrument_variable=False, attempts=None):

        # Attempts is how many files to try to build a NetCDF files from a
        # dataframe. For backwards compatibility purposes, we always try
        # everything (even manual matching which takes forever and is a memory
        # hog).
        attempts = attempts or 5

        if fillvalue is None:
            fillvalue = -9999.9
        if data_column is None:
            data_column = 'value'

        data_fillvalue = df[data_column].values.dtype.type(fillvalue)
        vertical_fillvalue = df['depth'].values.dtype.type(fillvalue)

        df[data_column] = df[data_column].fillna(data_fillvalue)
        times = np.asarray([ calendar.timegm(x.utctimetuple()) for x in df['time'] ])
        df['depth'] = df['depth'].fillna(vertical_fillvalue)

        depths = df['depth'].values
        try:
            ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
            ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
        except ValueError:
            if attempts < 2:
                raise
            logger.warning("Attempt 2: using unique times")
            try:
                # Try uniquing time
                newtimes  = np.unique(times)
                ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
            except ValueError:
                if attempts < 3:
                    raise
                logger.warning("Attempt 3: using unique depths")
                try:
                    # Try uniquing depths
                    newdepths = np.unique(df['depth'].values)
                    ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=newdepths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                    ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
                except ValueError:
                    if attempts < 4:
                        raise
                    logger.warning("Attempt 4: using unique time and depth")
                    try:
                        # Unique both time and depth
                        newdepths = np.unique(df['depth'].values)
                        ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=newdepths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                        ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
                    except ValueError:
                        if attempts < 5:
                            raise
                        logger.warning("Attempt 5: manually matching (this is SLOW)")
                        # Manually match
                        ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                        ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, times=times, verticals=depths, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=False, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable)
        return ts
Example #7
0
    def update_attributes(self, attributes):
        for k, v in attributes.pop('global', {}).items():
            try:
                self.setncattr(k, v)
            except BaseException:
                logger.warning('Could not set global attribute {}: {}'.format(
                    k, v))

        for k, v in attributes.items():
            if k in self.variables:
                for n, z in v.items():
                    try:
                        self.variables[k].setncattr(n, z)
                    except BaseException:
                        logger.warning(
                            'Could not set attribute {} on {}'.format(n, k))
        self.sync()
Example #8
0
    def from_dataframe(df, output_directory, output_filename, latitude, longitude, station_name, global_attributes, variable_name, variable_attributes, sensor_vertical_datum=None, fillvalue=None, data_column=None, vertical_axis_name=None, vertical_positive=None):

        if fillvalue is None:
            fillvalue = -9999.9
        if data_column is None:
            data_column = 'value'

        df[data_column] = df[data_column].fillna(fillvalue)
        times = np.asarray([ calendar.timegm(x.utctimetuple()) for x in df['time'] ])
        df['depth'] = df['depth'].fillna(fillvalue)
        depths = df['depth'].values
        try:
            ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
            ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True)
        except ValueError:
            logger.warning("Failed first attempt, trying again with unique times.")
            try:
                # Try uniquing time
                newtimes  = np.unique(times)
                ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True)
            except ValueError:
                logger.warning("Failed second attempt, trying again with unique depths.")
                try:
                    # Try uniquing depths
                    newdepths = np.unique(df['depth'].values)
                    ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=newdepths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                    ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True)
                except ValueError:
                    logger.warning("Failed third attempt, uniquing time and depth.")
                    try:
                        # Unique both time and depth
                        newdepths = np.unique(df['depth'].values)
                        ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=newdepths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                        ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True)
                    except ValueError:
                        logger.warning("Failed fourth attempt, manually matching indexes (this is slow).")
                        # Manually match
                        ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive)
                        ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, times=times, verticals=depths, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=False)
        return ts
Example #9
0
    def to_dataframe(self, clean_cols=True, clean_rows=True):
        # The index variable (trajectory_index) is identified by having an
        # attribute with name of instance_dimension whose value is the instance
        # dimension name (trajectory in this example). The index variable must
        # have the profile dimension as its sole dimension, and must be type
        # integer. Each value in the index variable is the zero-based trajectory
        # index that the profile belongs to i.e. profile p belongs to trajectory
        # i=trajectory_index(p), as in section H.2.5.
        r_index_var = self.get_variables_by_attributes(
            instance_dimension=lambda x: x is not None)[0]
        p_dim = self.dimensions[r_index_var.dimensions[0]]  # Profile dimension
        r_dim = self.dimensions[
            r_index_var.instance_dimension]  # Trajectory dimension

        # The count variable (row_size) contains the number of elements for
        # each profile, which must be written contiguously. The count variable
        # is identified by having an attribute with name sample_dimension whose
        # value is the sample dimension (obs in this example) being counted. It
        # must have the profile dimension as its sole dimension, and must be
        # type integer
        o_index_var = self.get_variables_by_attributes(
            sample_dimension=lambda x: x is not None)[0]
        o_dim = self.dimensions[
            o_index_var.sample_dimension]  # Sample dimension

        try:
            rvar = self.get_variables_by_attributes(cf_role='trajectory_id')[0]
            traj_indexes = normalize_array(rvar)
            assert traj_indexes.size == r_dim.size
        except BaseException:
            logger.warning(
                'Could not pull trajectory values a variable with "cf_role=trajectory_id", using a computed range.'
            )
            traj_indexes = np.arange(r_dim.size)
        try:
            pvar = self.get_variables_by_attributes(cf_role='profile_id')[0]
            profile_indexes = normalize_array(pvar)
            assert profile_indexes.size == p_dim.size
        except BaseException:
            logger.warning(
                'Could not pull profile values from a variable with "cf_role=profile_id", using a computed range.'
            )
            profile_indexes = np.arange(p_dim.size)

        # Profile dimension
        tvars = self.t_axes()
        if len(tvars) > 1:
            tvar = [
                v for v in self.t_axes() if v.dimensions == (
                    p_dim.name, ) and getattr(v, 'axis', '').lower() == 't'
            ][0]
        else:
            tvar = tvars[0]

        xvars = self.x_axes()
        if len(xvars) > 1:
            xvar = [
                v for v in self.x_axes() if v.dimensions == (
                    p_dim.name, ) and getattr(v, 'axis', '').lower() == 'x'
            ][0]
        else:
            xvar = xvars[0]

        yvars = self.y_axes()
        if len(yvars) > 1:
            yvar = [
                v for v in self.y_axes() if v.dimensions == (
                    p_dim.name, ) and getattr(v, 'axis', '').lower() == 'y'
            ][0]
        else:
            yvar = yvars[0]

        zvars = self.z_axes()
        if len(zvars) > 1:
            zvar = [
                v for v in self.z_axes() if v.dimensions == (
                    o_dim.name, ) and getattr(v, 'axis', '').lower() == 'z'
            ][0]
        else:
            zvar = zvars[0]

        p = np.ma.masked_all(o_dim.size, dtype=profile_indexes.dtype)
        r = np.ma.masked_all(o_dim.size, dtype=traj_indexes.dtype)
        t = np.ma.masked_all(o_dim.size, dtype=tvar.dtype)
        x = np.ma.masked_all(o_dim.size, dtype=xvar.dtype)
        y = np.ma.masked_all(o_dim.size, dtype=yvar.dtype)
        si = 0

        for i in np.arange(profile_indexes.size):
            ei = si + o_index_var[i]
            p[si:ei] = profile_indexes[i]
            r[si:ei] = traj_indexes[r_index_var[i]]
            t[si:ei] = tvar[i]
            x[si:ei] = xvar[i]
            y[si:ei] = yvar[i]
            si = ei

        t_mask = False
        tfill = get_fill_value(tvar)
        if tfill is not None:
            t_mask = np.copy(np.ma.getmaskarray(t))
            t[t_mask] = 1

        t = np.ma.MaskedArray(
            nc4.num2date(t, tvar.units, getattr(tvar, 'calendar', 'standard')))
        # Patch the time variable back to its original mask, since num2date
        # breaks any missing/fill values
        t[t_mask] = np.ma.masked

        # X and Y
        x = generic_masked(x, minv=-180, maxv=180).round(5)
        y = generic_masked(y, minv=-90, maxv=90).round(5)

        # Distance
        d = np.ma.zeros(o_dim.size, dtype=np.float64)
        d[1:] = great_distance(start_latitude=y[0:-1],
                               end_latitude=y[1:],
                               start_longitude=x[0:-1],
                               end_longitude=x[1:])['distance']
        d = generic_masked(np.cumsum(d), minv=0).round(2)

        # Sample dimension
        z = generic_masked(zvar[:].flatten(),
                           attrs=self.vatts(zvar.name)).round(5)

        df_data = {
            't': t,
            'x': x,
            'y': y,
            'z': z,
            'trajectory': r,
            'profile': p,
            'distance': d
        }

        building_index_to_drop = np.ones(o_dim.size, dtype=bool)
        extract_vars = list(set(self.data_vars() + self.ancillary_vars()))
        for i, dvar in enumerate(extract_vars):

            # Profile dimensions
            if dvar.dimensions == (p_dim.name, ):
                vdata = np.ma.masked_all(o_dim.size, dtype=dvar.dtype)
                si = 0
                for j in np.arange(profile_indexes.size):
                    ei = si + o_index_var[j]
                    vdata[si:ei] = dvar[j]
                    si = ei

            # Sample dimensions
            elif dvar.dimensions == (o_dim.name, ):
                vdata = generic_masked(dvar[:].flatten(),
                                       attrs=self.vatts(dvar.name)).round(3)

            else:
                logger.warning(
                    "Skipping variable {}... it didn't seem like a data variable"
                    .format(dvar))

            building_index_to_drop = (building_index_to_drop == True) & (
                vdata.mask == True)  # noqa
            df_data[dvar.name] = vdata

        df = pd.DataFrame(df_data)

        # Drop all data columns with no data
        if clean_cols:
            df = df.dropna(axis=1, how='all')

        # Drop all data rows with no data variable data
        if clean_rows:
            df = df.iloc[~building_index_to_drop]

        return df
Example #10
0
def urnify_from_dict(naming_authority, station_identifier, data_dict):

    def clean_value(v):
        return v.replace('(', '').replace(')', '').strip().replace(' ', '_')
    extras = []
    intervals = []  # Because it can be part of cell_methods and its own dict key

    if 'cell_methods' in data_dict and data_dict['cell_methods']:
        cm = data_dict['cell_methods']
        keys = []
        values = []
        sofar = ''
        for i, c in enumerate(cm):
            if c == ":":
                if len(keys) == len(values):
                    keys.append(clean_value(sofar))
                else:
                    for j in reversed(range(0, i)):
                        if cm[j] == " ":
                            key = clean_value(cm[j+1:i])
                            values.append(clean_value(sofar.replace(key, '')))
                            keys.append(key)
                            break
                sofar = ''
            else:
                sofar += c
        # The last value needs appending
        values.append(clean_value(sofar))

        pairs = zip(keys, values)

        mems = []
        cell_intervals = []
        pairs = sorted(pairs)
        for group, members in itertools.groupby(pairs, lambda x: x[0]):
            if group == 'interval':
                cell_intervals = [m[1] for m in members]
            elif group in ['time', 'area']:  # Ignore 'comments'. May need to add more things here...
                member_strings = []
                for m in members:
                    member_strings.append('{}:{}'.format(group, m[1]))
                mems.append(','.join(member_strings))
        if mems:
            extras.append('cell_methods={}'.format(','.join(mems)))
        if cell_intervals:
            intervals += cell_intervals

    if 'bounds' in data_dict and data_dict['bounds']:
        extras.append('bounds={0}'.format(data_dict['bounds']))

    if 'vertical_datum' in data_dict and data_dict['vertical_datum']:
        extras.append('vertical_datum={0}'.format(data_dict['vertical_datum']))

    if 'interval' in data_dict and data_dict['interval']:
        if isinstance(data_dict['interval'], (list, tuple,)):
            intervals += data_dict['interval']
        elif isinstance(data_dict['interval'], str):
            intervals += [data_dict['interval']]

    if 'standard_name' in data_dict and data_dict['standard_name']:
        variable_name = data_dict['standard_name']
    elif 'name' in data_dict and data_dict['name']:
        variable_name = data_dict['name']
    else:
        variable_name = ''.join(random.choice(string.ascii_uppercase) for _ in range(8)).lower()
        logger.warning("Had to randomly generate a variable name: {0}".format(variable_name))

    if 'discriminant' in data_dict and data_dict['discriminant']:
        variable_name = '{}-{}'.format(variable_name, data_dict['discriminant'])

    if intervals:
        intervals = list(set(intervals))  # Unique them
        extras.append('interval={}'.format(','.join(intervals)))

    if extras:
        variable_name = '{0}#{1}'.format(variable_name, ';'.join(extras))

    u = IoosUrn(asset_type='sensor',
                authority=naming_authority,
                label=station_identifier,
                component=variable_name,
                version=None)

    return u.urn
Example #11
0
def safe_attribute_typing(zdtype, value):
    try:
        return zdtype.type(value)
    except ValueError:
        logger.warning("Could not convert {} to type {}".format(value, zdtype))
        return None
Example #12
0
def urnify_from_dict(naming_authority, station_identifier, data_dict):

    def clean_value(v):
        return v.replace('(', '').replace(')', '').strip().replace(' ', '_')
    extras = []
    intervals = []  # Because it can be part of cell_methods and its own dict key

    if 'cell_methods' in data_dict and data_dict['cell_methods']:
        cm = data_dict['cell_methods']
        keys = []
        values = []
        sofar = ''
        for i, c in enumerate(cm):
            if c == ":":
                if len(keys) == len(values):
                    keys.append(clean_value(sofar))
                else:
                    for j in reversed(range(0, i)):
                        if cm[j] == " ":
                            key = clean_value(cm[j+1:i])
                            values.append(clean_value(sofar.replace(key, '')))
                            keys.append(key)
                            break
                sofar = ''
            else:
                sofar += c
        # The last value needs appending
        values.append(clean_value(sofar))

        pairs = zip(keys, values)

        mems = []
        cell_intervals = []
        pairs = sorted(pairs)
        for group, members in itertools.groupby(pairs, lambda x: x[0]):
            if group == 'interval':
                cell_intervals = [m[1] for m in members]
            elif group in ['time', 'area']:  # Ignore 'comments'. May need to add more things here...
                member_strings = []
                for m in members:
                    member_strings.append('{}:{}'.format(group, m[1]))
                mems.append(','.join(member_strings))
        if mems:
            extras.append('cell_methods={}'.format(','.join(mems)))
        if cell_intervals:
            intervals += cell_intervals

    if 'bounds' in data_dict and data_dict['bounds']:
        extras.append('bounds={0}'.format(data_dict['bounds']))

    if 'vertical_datum' in data_dict and data_dict['vertical_datum']:
        extras.append('vertical_datum={0}'.format(data_dict['vertical_datum']))

    if 'interval' in data_dict and data_dict['interval']:
        if isinstance(data_dict['interval'], (list, tuple,)):
            intervals += data_dict['interval']
        elif isinstance(data_dict['interval'], str):
            intervals += [data_dict['interval']]

    if 'standard_name' in data_dict and data_dict['standard_name']:
        variable_name = data_dict['standard_name']
    elif 'name' in data_dict and data_dict['name']:
        variable_name = data_dict['name']
    else:
        variable_name = ''.join(random.choice(string.ascii_uppercase) for _ in range(8)).lower()
        logger.warning("Had to randomly generate a variable name: {0}".format(variable_name))

    if 'discriminant' in data_dict and data_dict['discriminant']:
        variable_name = '{}-{}'.format(variable_name, data_dict['discriminant'])

    if intervals:
        intervals = list(set(intervals))  # Unique them
        extras.append('interval={}'.format(','.join(intervals)))

    if extras:
        variable_name = '{0}#{1}'.format(variable_name, ';'.join(extras))

    u = IoosUrn(asset_type='sensor',
                authority=naming_authority,
                label=station_identifier,
                component=variable_name,
                version=None)

    return u.urn
Example #13
0
    def add_variable(self,
                     variable_name,
                     values,
                     times=None,
                     verticals=None,
                     sensor_vertical_datum=None,
                     attributes=None,
                     unlink_from_profile=None,
                     fillvalue=None,
                     raise_on_error=False):

        if isinstance(values, (
                list,
                tuple,
        )) and values:
            values = np.asarray(values)
        if isinstance(times, (
                list,
                tuple,
        )) and times:
            times = np.asarray(times)
        if isinstance(verticals, (
                list,
                tuple,
        )) and verticals:
            verticals = np.asarray(verticals)

        # Set vertical datum on the CRS variable
        if sensor_vertical_datum is not None:
            try:
                self.crs.geoid_name = sensor_vertical_datum
                self.crs.vertical_datum = sensor_vertical_datum
                self.crs.water_surface_reference_datum = sensor_vertical_datum
            except AttributeError:
                pass

        # Set default fillvalue for new variables
        if fillvalue is None:
            fillvalue = -9999.9

        used_values = None
        try:
            if unlink_from_profile is True:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            # These next two cases should work for all but a few cases, which are caught below
            elif self.z.size == 1:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            else:
                used_values = np.ma.reshape(values, (
                    self.time.size,
                    self.z.size,
                ))
                used_values = used_values[self.time_indexes]
                try:
                    used_values = used_values[:, self.vertical_indexes]
                except IndexError:
                    # The vertical values most likely had duplicates.  Ignore the
                    # falty index here and try to save the values as is.
                    pass
        except ValueError:
            if raise_on_error is True:
                raise
            else:
                logger.warning(
                    "Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}"
                    .format(self.time.size, self.z.size, values.size))
            if self.z.size > 1:
                if times is not None and verticals is not None:
                    # Hmmm, we have two actual height values for this station.
                    # Not cool man, not cool.
                    # Reindex the entire values array.  This is slow.
                    indexed = ((bisect.bisect_left(self.time[:], times[i]),
                                bisect.bisect_left(self.z[:],
                                                   verticals[i]), values[i])
                               for i in range(values.size))
                    used_values = np.ndarray((
                        self.time.size,
                        self.z.size,
                    ),
                                             dtype=values.dtype)
                    used_values.fill(float(fillvalue))
                    for (tzi, zzi, vz) in indexed:
                        if zzi < self.z.size and tzi < self.time.size:
                            used_values[tzi, zzi] = vz
                else:
                    raise ValueError(
                        "You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter."
                    )
            else:
                if times is not None:
                    # Ugh, find the time indexes manually
                    indexed = ((bisect.bisect_left(self.time[:],
                                                   times[i]), values[i])
                               for i in range(values.size))
                    used_values = np.ndarray((self.time.size, ),
                                             dtype=values.dtype)
                    used_values.fill(float(fillvalue))
                    for (tzi, vz) in indexed:
                        if tzi < self.time.size:
                            used_values[tzi] = vz
                else:
                    raise ValueError(
                        "You need to pass in a 'times' parameter that matches the size of the 'values' parameter."
                    )

        with EnhancedDataset(self.out_file, 'a') as nc:
            logger.info("Setting values for {}...".format(variable_name))
            if len(used_values.shape) == 1:
                var = nc.createVariable(variable_name,
                                        used_values.dtype, ("time", ),
                                        fill_value=fillvalue,
                                        chunksizes=(1000, ),
                                        zlib=True)
                if self.z.size == 1:
                    var.coordinates = "{} {} latitude longitude".format(
                        self.time_axis_name, self.vertical_axis_name)
                else:
                    # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate
                    var.coordinates = "{} latitude longitude".format(
                        self.time_axis_name)
                    if unlink_from_profile is True:
                        # Create metadata variable for the sensor_depth
                        if nc.variables.get('sensor_depth') is None:
                            logger.info(
                                "Setting the special case 'sensor_depth' metadata variable"
                            )
                            inst_depth = nc.createVariable(
                                'sensor_depth', 'f4')
                            inst_depth.units = 'm'
                            inst_depth.standard_name = 'surface_altitude'
                            inst_depth.positive = self.vertical_positive
                            if self.vertical_positive.lower() == 'down':
                                inst_depth.long_name = 'sensor depth below datum'
                            elif self.vertical_positive.lower() == 'up':
                                inst_depth.long_name = 'sensor height above datum'
                            inst_depth.datum = sensor_vertical_datum or 'Unknown'
                            if verticals and verticals.size > 0:
                                inst_depth[:] = verticals[0]
                            else:
                                inst_depth[:] = self.vertical_fill

            elif len(used_values.shape) == 2:
                var = nc.createVariable(variable_name,
                                        used_values.dtype, (
                                            "time",
                                            "z",
                                        ),
                                        fill_value=fillvalue,
                                        chunksizes=(
                                            1000,
                                            self.z.size,
                                        ),
                                        zlib=True)
                var.coordinates = "{} {} latitude longitude".format(
                    self.time_axis_name, self.vertical_axis_name)
            else:
                raise ValueError(
                    "Could not create variable.  Shape of data is {!s}.  Expected a dimension of 1 or 2, not {!s}."
                    .format(used_values.shape, len(used_values.shape)))
            # Set the variable attributes as passed in
            if attributes:
                for k, v in attributes.items():

                    if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None:
                        # Use this as the vertical datum if it is specified and we didn't already have one
                        try:
                            self.crs.geoid_name = v
                            self.crs.vertical_datum = v
                            self.crs.water_surface_reference_datum = v
                        except AttributeError:
                            pass

                    if k not in ['name', 'coordinates', '_FillValue'
                                 ] and v is not None:
                        try:
                            var.setncattr(k, v)
                        except BaseException:
                            logger.info(
                                'Could not add attribute {}: {}, skipping.'.
                                format(k, v))

            var.grid_mapping = 'crs'
            var[:] = used_values

            return var
Example #14
0
import pytz
try:
    import pyncml
except ImportError:
    raise ImportError("You must install the 'pyncml' library to use this functionality.")

import netCDF4
import numpy as np
from pyaxiom.utils import DotDict

from pyaxiom import logger

try:
    from nco import Nco
except ImportError:
    logger.warning("NCO not found.  The NCO python bindings are required to use 'Collection.combine'.")


class Collection(object):

    @classmethod
    def from_ncml_file(cls, ncml_path, apply_to_members=None):
        try:
            with open(ncml_path) as f:
                return cls(pyncml.scan(f.read(), apply_to_members=apply_to_members))
        except BaseException:
            logger.exception("Could not load Collection from NcML.  Please check the NcML.")

    @classmethod
    def from_directory(cls, directory, suffix=".nc", subdirs=True, dimName='time', apply_to_members=None):
Example #15
0
    def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False, create_instrument_variable=False):

        if isinstance(values, (list, tuple,)) and values:
            values = np.asarray(values)
        if get_type(values) == np.int64:
            # Create values as int32 because DAP does not support int64 until DAP4.
            values = values.astype(np.int32)

        if isinstance(times, (list, tuple,)) and times:
            times = np.asarray(times)
        if get_type(times) == np.int64:
            # Create time as int32 because DAP does not support int64 until DAP4.
            times = times.astype(np.int32)

        if isinstance(verticals, (list, tuple,)) and verticals:
            verticals = np.asarray(verticals)
        if get_type(verticals) == np.int64:
            # Create verticals as int32 because DAP does not support int64 until DAP4.
            verticals = verticals.astype(np.int32)

        # Set vertical datum on the CRS variable
        if sensor_vertical_datum is not None:
            try:
                self.crs.geoid_name = sensor_vertical_datum
                self.crs.vertical_datum = sensor_vertical_datum
                self.crs.water_surface_reference_datum = sensor_vertical_datum
                if not hasattr(self._nc, "geospatial_bounds_vertical_crs"):
                    self._nc.setncattr("geospatial_bounds_vertical_crs", sensor_vertical_datum)
            except AttributeError:
                pass

        # Set default fillvalue for new variables
        if fillvalue is None:
            fillvalue = -9999.9
        fillvalue = values.dtype.type(fillvalue)

        used_values = None

        vertical_axis = self._nc.variables.get(self.vertical_axis_name)
        try:
            if unlink_from_profile is True:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            # These next two cases should work for all but a few cases, which are caught below
            elif vertical_axis.size == 1:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            else:
                used_values = np.ma.reshape(values, (self.time.size, vertical_axis.size, ))
                used_values = used_values[self.time_indexes]
                try:
                    used_values = used_values[:, self.vertical_indexes]
                except IndexError:
                    # The vertical values most likely had duplicates.  Ignore the
                    # falty index here and try to save the values as is.
                    pass
        except ValueError:
            if raise_on_error is True:
                raise
            else:
                logger.warning("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, vertical_axis.size, values.size))
            if vertical_axis.size > 1:
                if times is not None and verticals is not None:
                    # Hmmm, we have two actual height values for this station.
                    # Not cool man, not cool.
                    # Reindex the entire values array.  This is slow.
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(vertical_axis[:], verticals[i]), values[i]) for i in range(values.size))
                    used_values = np.ndarray((self.time.size, vertical_axis.size, ), dtype=get_type(values))
                    used_values.fill(fillvalue)
                    for (tzi, zzi, vz) in indexed:
                        if zzi < vertical_axis.size and tzi < self.time.size:
                            used_values[tzi, zzi] = vz
                    del indexed
                else:
                    raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.")
            else:
                if times is not None:
                    # Ugh, find the time indexes manually
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size))
                    used_values = np.ndarray((self.time.size, ), dtype=get_type(values))
                    used_values.fill(fillvalue)
                    for (tzi, vz) in indexed:
                        if tzi < self.time.size:
                            used_values[tzi] = vz
                    del indexed
                else:
                    raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.")

        logger.info("Setting values for {}...".format(variable_name))
        if len(used_values.shape) == 1:
            var = self._nc.createVariable(variable_name, get_type(used_values), ("time",), fill_value=fillvalue, chunksizes=(self.time_chunk,), zlib=True)
            self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeries_Orthogonal_Template_v2.0')
            if vertical_axis.size == 1:
                var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
            else:
                # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate
                var.coordinates = "{} latitude longitude".format(self.time_axis_name)
                if unlink_from_profile is True:
                    # Create metadata variable for the sensor_depth
                    if verticals is not None and self._nc.variables.get('sensor_depth') is None:
                        logger.info("Setting the special case 'sensor_depth' metadata variable")
                        inst_depth = self._nc.createVariable('sensor_depth', get_type(verticals))
                        inst_depth.units = 'm'
                        inst_depth.standard_name = 'surface_altitude'
                        inst_depth.positive = self.vertical_positive
                        if self.vertical_positive.lower() == 'down':
                            inst_depth.long_name = 'sensor depth below datum'
                        elif self.vertical_positive.lower() == 'up':
                            inst_depth.long_name = 'sensor height above datum'
                        inst_depth.datum = sensor_vertical_datum or 'Unknown'
                        if verticals and verticals.size > 0:
                            inst_depth[:] = verticals[0]
                        else:
                            inst_depth[:] = self.vertical_fill

        elif len(used_values.shape) == 2:
            var = self._nc.createVariable(variable_name, get_type(used_values), ("time", "z",), fill_value=fillvalue, chunksizes=(self.time_chunk, vertical_axis.size,), zlib=True)
            var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
            self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeriesProfile_Orthogonal_Template_v2.0')
        else:
            raise ValueError("Could not create variable.  Shape of data is {!s}.  Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape)))

        # Set missing_value as well
        attributes = attributes or {}
        attributes['missing_value'] = fillvalue
        # Set the variable attributes as passed in
        if attributes:
            for k, v in attributes.items():

                if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None:
                    # Use this as the vertical datum if it is specified and we didn't already have one
                    try:
                        self.crs.geoid_name = v
                        self.crs.vertical_datum = v
                        self.crs.water_surface_reference_datum = v
                        if not hasattr(self._nc, "geospatial_bounds_vertical_crs"):
                            self._nc.setncattr("geospatial_bounds_vertical_crs", v)
                    except AttributeError:
                        pass

                if k not in ['name', 'coordinates', '_FillValue'] and v is not None:
                    try:
                        var.setncattr(k, v)
                    except BaseException:
                        logger.info('Could not add attribute {}: {}, skipping.'.format(k, v))

        # Add a long name if it doesn't exist
        if not hasattr(var, 'long_name'):
            varunits = getattr(var, 'units', None)
            vartitle = getattr(var, 'standard_name', getattr(var, 'name'))
            vartitle = vartitle.title().replace('_', ' ')
            if varunits is not None:
                vartitle = '{} ({})'.format(vartitle, varunits)
            var.long_name = vartitle
        var.grid_mapping = 'crs'
        var.platform = 'platform'
        var.ancillary_variables = 'platform'
        var.coverage_content_type = 'physicalMeasurement'
        var[:] = used_values

        if create_instrument_variable is True:
            self.add_instrument_variable(variable_name)

        self._nc.sync()
        del used_values
        return var
Example #16
0
    def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False):

        if isinstance(values, (list, tuple,)) and values:
            values = np.asarray(values)
        if isinstance(times, (list, tuple,)) and times:
            times = np.asarray(times)
        if isinstance(verticals, (list, tuple,)) and verticals:
            verticals = np.asarray(verticals)

        # Set vertical datum on the CRS variable
        if sensor_vertical_datum is not None:
            try:
                self.crs.geoid_name = sensor_vertical_datum
                self.crs.vertical_datum = sensor_vertical_datum
                self.crs.water_surface_reference_datum = sensor_vertical_datum
            except AttributeError:
                pass

        # Set default fillvalue for new variables
        if fillvalue is None:
            fillvalue = -9999.9

        used_values = None
        try:
            if unlink_from_profile is True:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            # These next two cases should work for all but a few cases, which are caught below
            elif self.z.size == 1:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            else:
                used_values = np.ma.reshape(values, (self.time.size, self.z.size, ))
                used_values = used_values[self.time_indexes]
                try:
                    used_values = used_values[:, self.vertical_indexes]
                except IndexError:
                    # The vertical values most likely had duplicates.  Ignore the
                    # falty index here and try to save the values as is.
                    pass
        except ValueError:
            if raise_on_error is True:
                raise
            else:
                logger.warning("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, self.z.size, values.size))
            if self.z.size > 1:
                if times is not None and verticals is not None:
                    # Hmmm, we have two actual height values for this station.
                    # Not cool man, not cool.
                    # Reindex the entire values array.  This is slow.
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(self.z[:], verticals[i]), values[i]) for i in range(values.size))
                    used_values = np.ndarray((self.time.size, self.z.size, ), dtype=values.dtype)
                    used_values.fill(float(fillvalue))
                    for (tzi, zzi, vz) in indexed:
                        if zzi < self.z.size and tzi < self.time.size:
                            used_values[tzi, zzi] = vz
                else:
                    raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.")
            else:
                if times is not None:
                    # Ugh, find the time indexes manually
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size))
                    used_values = np.ndarray((self.time.size, ), dtype=values.dtype)
                    used_values.fill(float(fillvalue))
                    for (tzi, vz) in indexed:
                        if tzi < self.time.size:
                            used_values[tzi] = vz
                else:
                    raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.")

        with EnhancedDataset(self.out_file, 'a') as nc:
            logger.info("Setting values for {}...".format(variable_name))
            if len(used_values.shape) == 1:
                var = nc.createVariable(variable_name, used_values.dtype, ("time",), fill_value=fillvalue, chunksizes=(1000,), zlib=True)
                if self.z.size == 1:
                    var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
                else:
                    # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate
                    var.coordinates = "{} latitude longitude".format(self.time_axis_name)
                    if unlink_from_profile is True:
                        # Create metadata variable for the sensor_depth
                        if nc.variables.get('sensor_depth') is None:
                            logger.info("Setting the special case 'sensor_depth' metadata variable")
                            inst_depth = nc.createVariable('sensor_depth', 'f4')
                            inst_depth.units = 'm'
                            inst_depth.standard_name = 'surface_altitude'
                            inst_depth.positive = self.vertical_positive
                            if self.vertical_positive.lower() == 'down':
                                inst_depth.long_name = 'sensor depth below datum'
                            elif self.vertical_positive.lower() == 'up':
                                inst_depth.long_name = 'sensor height above datum'
                            inst_depth.datum = sensor_vertical_datum or 'Unknown'
                            if verticals and verticals.size > 0:
                                inst_depth[:] = verticals[0]
                            else:
                                inst_depth[:] = self.vertical_fill

            elif len(used_values.shape) == 2:
                var = nc.createVariable(variable_name, used_values.dtype, ("time", "z",), fill_value=fillvalue, chunksizes=(1000, self.z.size,), zlib=True)
                var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
            else:
                raise ValueError("Could not create variable.  Shape of data is {!s}.  Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape)))
            # Set the variable attributes as passed in
            if attributes:
                for k, v in attributes.items():

                    if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None:
                        # Use this as the vertical datum if it is specified and we didn't already have one
                        try:
                            self.crs.geoid_name = v
                            self.crs.vertical_datum = v
                            self.crs.water_surface_reference_datum = v
                        except AttributeError:
                            pass

                    if k not in ['name', 'coordinates', '_FillValue'] and v is not None:
                        try:
                            var.setncattr(k, v)
                        except BaseException:
                            logger.info('Could not add attribute {}: {}, skipping.'.format(k, v))

            var.grid_mapping = 'crs'
            var[:] = used_values

            return var
Example #17
0
import pytz
try:
    import pyncml
except ImportError:
    raise ImportError("You must install the 'pyncml' library to use this functionality.")

import netCDF4
import numpy as np
from pyaxiom.utils import DotDict

from pyaxiom import logger

try:
    from nco import Nco
except ImportError:
    logger.warning("NCO not found.  The NCO python bindings are required to use 'Collection.combine'.")


class Collection(object):

    @classmethod
    def from_ncml_file(cls, ncml_path, apply_to_members=None):
        try:
            with open(ncml_path) as f:
                return cls(pyncml.scan(f.read(), apply_to_members=apply_to_members))
        except BaseException:
            logger.exception("Could not load Collection from NcML.  Please check the NcML.")

    @classmethod
    def from_directory(cls, directory, suffix=".nc", subdirs=True, dimName='time', apply_to_members=None):
Example #18
0
    def from_dataframe(df,
                       output_directory,
                       output_filename,
                       latitude,
                       longitude,
                       station_name,
                       global_attributes,
                       variable_name,
                       variable_attributes,
                       sensor_vertical_datum=None,
                       fillvalue=None,
                       data_column=None,
                       vertical_axis_name=None,
                       vertical_positive=None):

        if fillvalue is None:
            fillvalue = -9999.9
        if data_column is None:
            data_column = 'value'

        df[data_column] = df[data_column].fillna(fillvalue)
        times = np.asarray(
            [calendar.timegm(x.utctimetuple()) for x in df['time']])
        df['depth'] = df['depth'].fillna(fillvalue)
        depths = df['depth'].values
        try:
            ts = TimeSeries(output_directory,
                            latitude,
                            longitude,
                            station_name,
                            global_attributes,
                            times=times,
                            verticals=depths,
                            output_filename=output_filename,
                            vertical_fill=fillvalue,
                            vertical_axis_name=vertical_axis_name,
                            vertical_positive=vertical_positive)
            ts.add_variable(variable_name,
                            df[data_column].values,
                            attributes=variable_attributes,
                            sensor_vertical_datum=sensor_vertical_datum,
                            raise_on_error=True)
        except ValueError:
            logger.warning(
                "Failed first attempt, trying again with unique times.")
            try:
                # Try uniquing time
                newtimes = np.unique(times)
                ts = TimeSeries(output_directory,
                                latitude,
                                longitude,
                                station_name,
                                global_attributes,
                                times=newtimes,
                                verticals=depths,
                                output_filename=output_filename,
                                vertical_fill=fillvalue,
                                vertical_axis_name=vertical_axis_name,
                                vertical_positive=vertical_positive)
                ts.add_variable(variable_name,
                                df[data_column].values,
                                attributes=variable_attributes,
                                sensor_vertical_datum=sensor_vertical_datum,
                                raise_on_error=True)
            except ValueError:
                logger.warning(
                    "Failed second attempt, trying again with unique depths.")
                try:
                    # Try uniquing depths
                    newdepths = np.unique(df['depth'].values)
                    ts = TimeSeries(output_directory,
                                    latitude,
                                    longitude,
                                    station_name,
                                    global_attributes,
                                    times=times,
                                    verticals=newdepths,
                                    output_filename=output_filename,
                                    vertical_fill=fillvalue,
                                    vertical_axis_name=vertical_axis_name,
                                    vertical_positive=vertical_positive)
                    ts.add_variable(
                        variable_name,
                        df[data_column].values,
                        attributes=variable_attributes,
                        sensor_vertical_datum=sensor_vertical_datum,
                        raise_on_error=True)
                except ValueError:
                    logger.warning(
                        "Failed third attempt, uniquing time and depth.")
                    try:
                        # Unique both time and depth
                        newdepths = np.unique(df['depth'].values)
                        ts = TimeSeries(output_directory,
                                        latitude,
                                        longitude,
                                        station_name,
                                        global_attributes,
                                        times=newtimes,
                                        verticals=newdepths,
                                        output_filename=output_filename,
                                        vertical_fill=fillvalue,
                                        vertical_axis_name=vertical_axis_name,
                                        vertical_positive=vertical_positive)
                        ts.add_variable(
                            variable_name,
                            df[data_column].values,
                            attributes=variable_attributes,
                            sensor_vertical_datum=sensor_vertical_datum,
                            raise_on_error=True)
                    except ValueError:
                        logger.warning(
                            "Failed fourth attempt, manually matching indexes (this is slow)."
                        )
                        # Manually match
                        ts = TimeSeries(output_directory,
                                        latitude,
                                        longitude,
                                        station_name,
                                        global_attributes,
                                        times=times,
                                        verticals=depths,
                                        output_filename=output_filename,
                                        vertical_fill=fillvalue,
                                        vertical_axis_name=vertical_axis_name,
                                        vertical_positive=vertical_positive)
                        ts.add_variable(
                            variable_name,
                            df[data_column].values,
                            attributes=variable_attributes,
                            times=times,
                            verticals=depths,
                            sensor_vertical_datum=sensor_vertical_datum,
                            raise_on_error=False)
        return ts