Example #1
0
    def add_instrument_variable(self, variable_name):
        if variable_name not in self._nc.variables:
            logger.error("Variable {} not found in file, cannot create instrument metadata variable")
            return
        elif 'id' not in self._nc.ncattrs() or 'naming_authority' not in self._nc.ncattrs():
            logger.error("Global attributes 'id' and 'naming_authority' are required to create an instrument variable")
            return

        instr_var_name = "{}_instrument".format(variable_name)
        instrument = self._nc.createVariable(instr_var_name, "i4")

        datavar = self._nc.variables[variable_name]
        vats = { k: getattr(datavar, k) for k in datavar.ncattrs() }
        instrument_urn = urnify(self._nc.naming_authority, self._nc.id, vats)

        inst_urn = IoosUrn.from_string(instrument_urn)
        instrument.long_name = 'Instrument measuring {} from {}'.format(inst_urn.component, inst_urn.label)
        instrument.ioos_code = instrument_urn
        instrument.short_name = inst_urn.component
        instrument.definition = "http://mmisw.org/ont/ioos/definition/sensorID"

        datavar.instrument = instr_var_name

        # Append the instrument to the ancilary variables
        av = getattr(datavar, 'ancillary_variables', '')
        av += ' {}'.format(instr_var_name)
        datavar.ancillary_variables = av.strip()

        self._nc.sync()
Example #2
0
    def bins(self, delta, starting, hard_start=None, hard_end=None):
        ending = starting + delta

        windows = []

        member_length = len(self.aggregation.members)
        last_member = self.aggregation.members[-1]
        index = 0

        if hard_start is None:
            hard_start = starting
        if hard_end is None:
            hard_end = last_member.ending

        # Loop until we process the last member of the aggregation
        while last_member.ending >= starting:

            # Window for this timedelta
            member = None
            window = DotDict(starting=starting, ending=ending, members=[])

            for x in range(index, member_length):
                member = self.aggregation.members[x]

                if member.starting >= starting and member.ending < ending:
                    if member.starting >= hard_start and member.ending <= hard_end:
                        # The simplest case... completely part of this aggregation
                        # and within the specified 'hard' bounds
                        window.members.append(member)
                        index += 1

                elif member.starting >= ending:
                    # This member is outside of the current window and we need to make
                    # new window(s) until it fits into one.
                    break

                elif (member.starting >= starting and member.ending >= ending) or (
                    member.starting < starting and member.ending < ending
                ):
                    # This member overlaps where the cutoff would be.  This is
                    # NOT supported at the moment
                    logger.error(
                        "Skipping {0}.  Members that overlap a bin boundary are not supported at this time.".format(
                            member.path
                        )
                    )
                    index += 1

            # Move the time window by the delta
            if len(window.members) > 1:
                windows.append(window)

            starting = ending
            ending = ending + delta

        return windows
Example #3
0
    def bins(self, delta, starting, hard_start=None, hard_end=None):
        ending = starting + delta

        windows = []

        member_length = len(self.aggregation.members)
        last_member = self.aggregation.members[-1]
        index = 0

        if hard_start is None:
            hard_start = starting
        if hard_end is None:
            hard_end = last_member.ending

        # Loop until we process the last member of the aggregation
        while last_member.ending >= starting:

            # Window for this timedelta
            member = None
            window  = DotDict(starting=starting, ending=ending, members=[])

            for x in range(index, member_length):
                member = self.aggregation.members[x]

                if member.starting >= starting and member.ending < ending:
                    if member.starting >= hard_start and member.ending <= hard_end:
                        # The simplest case... completely part of this aggregation
                        # and within the specified 'hard' bounds
                        window.members.append(member)
                        index += 1

                elif member.starting >= ending:
                    # This member is outside of the current window and we need to make
                    # new window(s) until it fits into one.
                    break

                elif (member.starting >= starting and member.ending >= ending) or \
                     (member.starting < starting and member.ending < ending):
                    # This member overlaps where the cutoff would be.  This is
                    # NOT supported at the moment
                    logger.error("Skipping {0}.  Members that overlap a bin boundary are not supported at this time.".format(member.path))
                    index += 1

            # Move the time window by the delta
            if len(window.members) > 1:
                windows.append(window)

            starting = ending
            ending = ending + delta

        return windows
Example #4
0
    def valid(self):
        ASSET_TYPES = ['station', 'network', 'sensor', 'survey']

        try:
            assert self.authority is not None
        except AssertionError:
            logger.error('An "authority" is required')
            return False

        try:
            assert self.label is not None
        except AssertionError:
            logger.error('A "label" is required')
            return False

        try:
            assert self.asset_type in ASSET_TYPES
        except AssertionError:
            logger.error(
                'asset_type {0} is unknown.  Must be one of: {1}'.format(
                    self.asset_type, ', '.join(ASSET_TYPES)))
            return False

        if self.asset_type == 'station':
            try:
                assert self.component is None
            except AssertionError:
                logger.error(
                    'An asset_type of "station" may not have a "component".')
                return False

        return True
Example #5
0
    def valid(self):
        ASSET_TYPES = ['station', 'network', 'sensor', 'survey']

        try:
            assert self.authority is not None
        except AssertionError:
            logger.error('An "authority" is required')
            return False

        try:
            assert self.label is not None
        except AssertionError:
            logger.error('A "label" is required')
            return False

        try:
            assert self.asset_type in ASSET_TYPES
        except AssertionError:
            logger.error('asset_type {0} is unknown.  Must be one of: {1}'.format(self.asset_type, ', '.join(ASSET_TYPES)))
            return False

        if self.asset_type == 'station':
            try:
                assert self.component is None
            except AssertionError:
                logger.error('An asset_type of "station" may not have a "component".')
                return False

        return True
Example #6
0
    def from_directory(cls, directory, suffix=".nc", subdirs=True, dimName='time', apply_to_members=None):

        if not os.path.isdir(directory):
            logger.error("Directory {0} does not exists or I do not have the correct permissions to access".format(directory))

        # Create NcML pointing to the directory
        ncml = """<?xml version="1.0" encoding="UTF-8"?>
                    <netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
                        <aggregation dimName="{0}" type="joinExisting">
                            <scan location="{1}" suffix="{2}" subdirs="{3}" />
                        </aggregation>
                    </netcdf>
               """.format(dimName, directory, suffix, subdirs)
        try:
            return cls(pyncml.scan(ncml, apply_to_members=apply_to_members))
        except BaseException:
            logger.exception("Could not load Collection from Directory.")
Example #7
0
    def from_directory(cls, directory, suffix=".nc", subdirs=True, dimName='time', apply_to_members=None):

        if not os.path.isdir(directory):
            logger.error("Directory {0} does not exists or I do not have the correct permissions to access".format(directory))

        # Create NcML pointing to the directory
        ncml = """<?xml version="1.0" encoding="UTF-8"?>
                    <netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2">
                        <aggregation dimName="{0}" type="joinExisting">
                            <scan location="{1}" suffix="{2}" subdirs="{3}" />
                        </aggregation>
                    </netcdf>
               """.format(dimName, directory, suffix, subdirs)
        try:
            return cls(pyncml.scan(ncml, apply_to_members=apply_to_members))
        except BaseException:
            logger.exception("Could not load Collection from Directory.")
Example #8
0
    def from_glob(cls, glob_string, timevar_name='time', ncml=None):
        dataset_name      = None
        dataset_starting  = None
        dataset_ending    = None
        dataset_variables = []
        dataset_members   = []

        files = glob(glob_string)
        logger.info("Processing aggregation containing {!s} files".format(len(files)))
        for i, filepath in enumerate(files):
            logger.info("Processing member ({0}/{1}) - {2} ".format(i+1, len(files), filepath))
            nc = None
            try:
                if ncml is not None:
                    # Apply NcML
                    tmp_f, tmp_fp = tempfile.mkstemp(prefix="nc")
                    os.close(tmp_f)
                    nc = pyncml.apply(filepath, ncml, output_file=tmp_fp)
                else:
                    nc = netCDF4.Dataset(filepath)

                if dataset_name is None:
                    if hasattr(nc, 'name'):
                        dataset_name = nc.name
                    elif hasattr(nc, 'title'):
                        dataset_name = nc.title
                    else:
                        dataset_name = "Pyaxiom Glob Dataset"

                timevar = nc.variables.get(timevar_name)
                if timevar is None:
                    logger.error("Time variable '{0}' was not found in file '{1}'. Skipping.".format(timevar_name, filepath))
                    continue

                # Start/Stop of NetCDF file
                starting  = netCDF4.num2date(np.min(timevar[:]), units=timevar.units)
                ending    = netCDF4.num2date(np.max(timevar[:]), units=timevar.units)
                variables = filter(None, [ nc.variables[v].standard_name if hasattr(nc.variables[v], 'standard_name') else None for v in nc.variables.keys() ])

                dataset_variables = list(set(dataset_variables + variables))

                if starting.tzinfo is None:
                    starting = starting.replace(tzinfo=pytz.utc)
                if ending.tzinfo is None:
                    ending = ending.replace(tzinfo=pytz.utc)
                if dataset_starting is None or starting < dataset_starting:
                    dataset_starting = starting
                if dataset_ending is None or ending > dataset_ending:
                    dataset_ending = ending

                member = DotDict(path=filepath, standard_names=variables, starting=starting, ending=ending)
                dataset_members.append(member)
            except BaseException:
                logger.exception("Something went wrong with {0}".format(filepath))
                continue
            finally:
                nc.close()
                try:
                    os.remove(tmp_fp)
                except (OSError, UnboundLocalError):
                    pass

        dataset_members = sorted(dataset_members, key=operator.attrgetter('starting'))
        return cls(DotDict(name=dataset_name,
                           timevar_name=timevar_name,
                           starting=dataset_starting,
                           ending=dataset_ending,
                           standard_names=dataset_variables,
                           members=dataset_members))
Example #9
0
def dictify_urn(urn, combine_interval=True):
    """
        By default, this will put the `interval` as part of the `cell_methods`
        attribute (NetCDF CF style). To return `interval` as its own key, use
        the `combine_interval=False` parameter.
    """
    ioos_urn = IoosUrn.from_string(urn)

    if ioos_urn.valid() is False:
        return dict()

    if ioos_urn.asset_type != 'sensor':
        logger.error("This function only works on 'sensor' URNs.")
        return dict()

    if '#' in ioos_urn.component:
        standard_name, extras = ioos_urn.component.split('#')
    else:
        standard_name = ioos_urn.component
        extras = ''

    d = dict(standard_name=standard_name)

    # Discriminant
    if '-' in ioos_urn.component:
        d['discriminant'] = ioos_urn.component.split('-')[-1]
        d['standard_name'] = ioos_urn.component.split('-')[0]

    intervals = []
    cell_methods = []
    if extras:
        for section in extras.split(';'):
            key, values = section.split('=')
            if key == 'interval':
                # special case, intervals should be appended to the cell_methods
                for v in values.split(','):
                    intervals.append(v)
            else:
                if key == 'cell_methods':
                    value = [ x.replace('_', ' ').replace(':', ': ') for x in values.split(',') ]
                    cell_methods = value
                else:
                    value = ' '.join([x.replace('_', ' ').replace(':', ': ') for x in values.split(',')])
                    d[key] = value

    if combine_interval is True:
        if cell_methods and intervals:
            if len(cell_methods) == len(intervals):
                d['cell_methods'] = ' '.join([ '{} (interval: {})'.format(x[0], x[1].upper()) for x in zip(cell_methods, intervals) ])
            else:
                d['cell_methods'] = ' '.join(cell_methods)
                for i in intervals:
                    d['cell_methods'] += ' (interval: {})'.format(i.upper())
        elif cell_methods:
            d['cell_methods'] = ' '.join(cell_methods)
            for i in intervals:
                d['cell_methods'] += ' (interval: {})'.format(i.upper())
        elif intervals:
            raise ValueError("An interval without a cell_method is not allowed!  Not possible!")
    else:
        d['cell_methods'] = ' '.join(cell_methods)
        d['interval'] = ','.join(intervals).upper()

    if 'vertical_datum' in d:
        d['vertical_datum'] = d['vertical_datum'].upper()

    return d
Example #10
0
def dictify_urn(urn, combine_interval=True):
    """
        By default, this will put the `interval` as part of the `cell_methods`
        attribute (NetCDF CF style). To return `interval` as its own key, use
        the `combine_interval=False` parameter.
    """
    ioos_urn = IoosUrn.from_string(urn)

    if ioos_urn.valid() is False:
        return dict()

    if ioos_urn.asset_type != 'sensor':
        logger.error("This function only works on 'sensor' URNs.")
        return dict()

    if '#' in ioos_urn.component:
        standard_name, extras = ioos_urn.component.split('#')
    else:
        standard_name = ioos_urn.component
        extras = ''

    d = dict(standard_name=standard_name)

    # Discriminant
    if '-' in ioos_urn.component:
        d['discriminant'] = standard_name.split('-')[-1]
        d['standard_name'] = standard_name.split('-')[0]

    intervals = []
    cell_methods = []
    if extras:
        for section in extras.split(';'):
            key, values = section.split('=')
            if key == 'interval':
                # special case, intervals should be appended to the cell_methods
                for v in values.split(','):
                    intervals.append(v)
            else:
                if key == 'cell_methods':
                    value = [ x.replace('_', ' ').replace(':', ': ') for x in values.split(',') ]
                    cell_methods = value
                else:
                    value = ' '.join([x.replace('_', ' ').replace(':', ': ') for x in values.split(',')])
                    d[key] = value

    if combine_interval is True:
        if cell_methods and intervals:
            if len(cell_methods) == len(intervals):
                d['cell_methods'] = ' '.join([ '{} (interval: {})'.format(x[0], x[1].upper()) for x in zip(cell_methods, intervals) ])
            else:
                d['cell_methods'] = ' '.join(cell_methods)
                for i in intervals:
                    d['cell_methods'] += ' (interval: {})'.format(i.upper())
        elif cell_methods:
            d['cell_methods'] = ' '.join(cell_methods)
            for i in intervals:
                d['cell_methods'] += ' (interval: {})'.format(i.upper())
        elif intervals:
            raise ValueError("An interval without a cell_method is not allowed!  Not possible!")
    else:
        d['cell_methods'] = ' '.join(cell_methods)
        d['interval'] = ','.join(intervals).upper()

    if 'vertical_datum' in d:
        d['vertical_datum'] = d['vertical_datum'].upper()

    return d
Example #11
0
    def from_glob(cls, glob_string, timevar_name='time', ncml=None):
        dataset_name      = None
        dataset_starting  = None
        dataset_ending    = None
        dataset_variables = []
        dataset_members   = []

        files = glob(glob_string)
        logger.info("Processing aggregation containing {!s} files".format(len(files)))
        for i, filepath in enumerate(files):
            logger.info("Processing member ({0}/{1}) - {2} ".format(i+1, len(files), filepath))
            nc = None
            try:
                if ncml is not None:
                    # Apply NcML
                    tmp_f, tmp_fp = tempfile.mkstemp(prefix="nc")
                    os.close(tmp_f)
                    nc = pyncml.apply(filepath, ncml, output_file=tmp_fp)
                else:
                    nc = netCDF4.Dataset(filepath)

                if dataset_name is None:
                    if 'name' in nc.ncattrs():
                        dataset_name = nc.name
                    elif 'title' in nc.ncattrs():
                        dataset_name = nc.title
                    else:
                        dataset_name = "Pyaxiom Glob Dataset"

                timevar = nc.variables.get(timevar_name)
                if timevar is None:
                    logger.error("Time variable '{0}' was not found in file '{1}'. Skipping.".format(timevar_name, filepath))
                    continue

                # Start/Stop of NetCDF file
                starting  = netCDF4.num2date(np.min(timevar[:]), units=timevar.units)
                ending    = netCDF4.num2date(np.max(timevar[:]), units=timevar.units)
                variables = list([_f for _f in [ nc.variables[v].standard_name if hasattr(nc.variables[v], 'standard_name') else None for v in list(nc.variables.keys()) ] if _f])

                dataset_variables = list(set(dataset_variables + variables))

                if starting.tzinfo is None:
                    starting = starting.replace(tzinfo=pytz.utc)
                if ending.tzinfo is None:
                    ending = ending.replace(tzinfo=pytz.utc)
                if dataset_starting is None or starting < dataset_starting:
                    dataset_starting = starting
                if dataset_ending is None or ending > dataset_ending:
                    dataset_ending = ending

                member = DotDict(path=filepath, standard_names=variables, starting=starting, ending=ending)
                dataset_members.append(member)
            except BaseException:
                logger.exception("Something went wrong with {0}".format(filepath))
                continue
            finally:
                nc.close()
                try:
                    os.remove(tmp_fp)
                except (OSError, UnboundLocalError):
                    pass

        dataset_members = sorted(dataset_members, key=operator.attrgetter('starting'))
        return cls(DotDict(name=dataset_name,
                           timevar_name=timevar_name,
                           starting=dataset_starting,
                           ending=dataset_ending,
                           standard_names=dataset_variables,
                           members=dataset_members))