def add_instrument_variable(self, variable_name): if variable_name not in self._nc.variables: logger.error("Variable {} not found in file, cannot create instrument metadata variable") return elif 'id' not in self._nc.ncattrs() or 'naming_authority' not in self._nc.ncattrs(): logger.error("Global attributes 'id' and 'naming_authority' are required to create an instrument variable") return instr_var_name = "{}_instrument".format(variable_name) instrument = self._nc.createVariable(instr_var_name, "i4") datavar = self._nc.variables[variable_name] vats = { k: getattr(datavar, k) for k in datavar.ncattrs() } instrument_urn = urnify(self._nc.naming_authority, self._nc.id, vats) inst_urn = IoosUrn.from_string(instrument_urn) instrument.long_name = 'Instrument measuring {} from {}'.format(inst_urn.component, inst_urn.label) instrument.ioos_code = instrument_urn instrument.short_name = inst_urn.component instrument.definition = "http://mmisw.org/ont/ioos/definition/sensorID" datavar.instrument = instr_var_name # Append the instrument to the ancilary variables av = getattr(datavar, 'ancillary_variables', '') av += ' {}'.format(instr_var_name) datavar.ancillary_variables = av.strip() self._nc.sync()
def bins(self, delta, starting, hard_start=None, hard_end=None): ending = starting + delta windows = [] member_length = len(self.aggregation.members) last_member = self.aggregation.members[-1] index = 0 if hard_start is None: hard_start = starting if hard_end is None: hard_end = last_member.ending # Loop until we process the last member of the aggregation while last_member.ending >= starting: # Window for this timedelta member = None window = DotDict(starting=starting, ending=ending, members=[]) for x in range(index, member_length): member = self.aggregation.members[x] if member.starting >= starting and member.ending < ending: if member.starting >= hard_start and member.ending <= hard_end: # The simplest case... completely part of this aggregation # and within the specified 'hard' bounds window.members.append(member) index += 1 elif member.starting >= ending: # This member is outside of the current window and we need to make # new window(s) until it fits into one. break elif (member.starting >= starting and member.ending >= ending) or ( member.starting < starting and member.ending < ending ): # This member overlaps where the cutoff would be. This is # NOT supported at the moment logger.error( "Skipping {0}. Members that overlap a bin boundary are not supported at this time.".format( member.path ) ) index += 1 # Move the time window by the delta if len(window.members) > 1: windows.append(window) starting = ending ending = ending + delta return windows
def bins(self, delta, starting, hard_start=None, hard_end=None): ending = starting + delta windows = [] member_length = len(self.aggregation.members) last_member = self.aggregation.members[-1] index = 0 if hard_start is None: hard_start = starting if hard_end is None: hard_end = last_member.ending # Loop until we process the last member of the aggregation while last_member.ending >= starting: # Window for this timedelta member = None window = DotDict(starting=starting, ending=ending, members=[]) for x in range(index, member_length): member = self.aggregation.members[x] if member.starting >= starting and member.ending < ending: if member.starting >= hard_start and member.ending <= hard_end: # The simplest case... completely part of this aggregation # and within the specified 'hard' bounds window.members.append(member) index += 1 elif member.starting >= ending: # This member is outside of the current window and we need to make # new window(s) until it fits into one. break elif (member.starting >= starting and member.ending >= ending) or \ (member.starting < starting and member.ending < ending): # This member overlaps where the cutoff would be. This is # NOT supported at the moment logger.error("Skipping {0}. Members that overlap a bin boundary are not supported at this time.".format(member.path)) index += 1 # Move the time window by the delta if len(window.members) > 1: windows.append(window) starting = ending ending = ending + delta return windows
def valid(self): ASSET_TYPES = ['station', 'network', 'sensor', 'survey'] try: assert self.authority is not None except AssertionError: logger.error('An "authority" is required') return False try: assert self.label is not None except AssertionError: logger.error('A "label" is required') return False try: assert self.asset_type in ASSET_TYPES except AssertionError: logger.error( 'asset_type {0} is unknown. Must be one of: {1}'.format( self.asset_type, ', '.join(ASSET_TYPES))) return False if self.asset_type == 'station': try: assert self.component is None except AssertionError: logger.error( 'An asset_type of "station" may not have a "component".') return False return True
def valid(self): ASSET_TYPES = ['station', 'network', 'sensor', 'survey'] try: assert self.authority is not None except AssertionError: logger.error('An "authority" is required') return False try: assert self.label is not None except AssertionError: logger.error('A "label" is required') return False try: assert self.asset_type in ASSET_TYPES except AssertionError: logger.error('asset_type {0} is unknown. Must be one of: {1}'.format(self.asset_type, ', '.join(ASSET_TYPES))) return False if self.asset_type == 'station': try: assert self.component is None except AssertionError: logger.error('An asset_type of "station" may not have a "component".') return False return True
def from_directory(cls, directory, suffix=".nc", subdirs=True, dimName='time', apply_to_members=None): if not os.path.isdir(directory): logger.error("Directory {0} does not exists or I do not have the correct permissions to access".format(directory)) # Create NcML pointing to the directory ncml = """<?xml version="1.0" encoding="UTF-8"?> <netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2"> <aggregation dimName="{0}" type="joinExisting"> <scan location="{1}" suffix="{2}" subdirs="{3}" /> </aggregation> </netcdf> """.format(dimName, directory, suffix, subdirs) try: return cls(pyncml.scan(ncml, apply_to_members=apply_to_members)) except BaseException: logger.exception("Could not load Collection from Directory.")
def from_glob(cls, glob_string, timevar_name='time', ncml=None): dataset_name = None dataset_starting = None dataset_ending = None dataset_variables = [] dataset_members = [] files = glob(glob_string) logger.info("Processing aggregation containing {!s} files".format(len(files))) for i, filepath in enumerate(files): logger.info("Processing member ({0}/{1}) - {2} ".format(i+1, len(files), filepath)) nc = None try: if ncml is not None: # Apply NcML tmp_f, tmp_fp = tempfile.mkstemp(prefix="nc") os.close(tmp_f) nc = pyncml.apply(filepath, ncml, output_file=tmp_fp) else: nc = netCDF4.Dataset(filepath) if dataset_name is None: if hasattr(nc, 'name'): dataset_name = nc.name elif hasattr(nc, 'title'): dataset_name = nc.title else: dataset_name = "Pyaxiom Glob Dataset" timevar = nc.variables.get(timevar_name) if timevar is None: logger.error("Time variable '{0}' was not found in file '{1}'. Skipping.".format(timevar_name, filepath)) continue # Start/Stop of NetCDF file starting = netCDF4.num2date(np.min(timevar[:]), units=timevar.units) ending = netCDF4.num2date(np.max(timevar[:]), units=timevar.units) variables = filter(None, [ nc.variables[v].standard_name if hasattr(nc.variables[v], 'standard_name') else None for v in nc.variables.keys() ]) dataset_variables = list(set(dataset_variables + variables)) if starting.tzinfo is None: starting = starting.replace(tzinfo=pytz.utc) if ending.tzinfo is None: ending = ending.replace(tzinfo=pytz.utc) if dataset_starting is None or starting < dataset_starting: dataset_starting = starting if dataset_ending is None or ending > dataset_ending: dataset_ending = ending member = DotDict(path=filepath, standard_names=variables, starting=starting, ending=ending) dataset_members.append(member) except BaseException: logger.exception("Something went wrong with {0}".format(filepath)) continue finally: nc.close() try: os.remove(tmp_fp) except (OSError, UnboundLocalError): pass dataset_members = sorted(dataset_members, key=operator.attrgetter('starting')) return cls(DotDict(name=dataset_name, timevar_name=timevar_name, starting=dataset_starting, ending=dataset_ending, standard_names=dataset_variables, members=dataset_members))
def dictify_urn(urn, combine_interval=True): """ By default, this will put the `interval` as part of the `cell_methods` attribute (NetCDF CF style). To return `interval` as its own key, use the `combine_interval=False` parameter. """ ioos_urn = IoosUrn.from_string(urn) if ioos_urn.valid() is False: return dict() if ioos_urn.asset_type != 'sensor': logger.error("This function only works on 'sensor' URNs.") return dict() if '#' in ioos_urn.component: standard_name, extras = ioos_urn.component.split('#') else: standard_name = ioos_urn.component extras = '' d = dict(standard_name=standard_name) # Discriminant if '-' in ioos_urn.component: d['discriminant'] = ioos_urn.component.split('-')[-1] d['standard_name'] = ioos_urn.component.split('-')[0] intervals = [] cell_methods = [] if extras: for section in extras.split(';'): key, values = section.split('=') if key == 'interval': # special case, intervals should be appended to the cell_methods for v in values.split(','): intervals.append(v) else: if key == 'cell_methods': value = [ x.replace('_', ' ').replace(':', ': ') for x in values.split(',') ] cell_methods = value else: value = ' '.join([x.replace('_', ' ').replace(':', ': ') for x in values.split(',')]) d[key] = value if combine_interval is True: if cell_methods and intervals: if len(cell_methods) == len(intervals): d['cell_methods'] = ' '.join([ '{} (interval: {})'.format(x[0], x[1].upper()) for x in zip(cell_methods, intervals) ]) else: d['cell_methods'] = ' '.join(cell_methods) for i in intervals: d['cell_methods'] += ' (interval: {})'.format(i.upper()) elif cell_methods: d['cell_methods'] = ' '.join(cell_methods) for i in intervals: d['cell_methods'] += ' (interval: {})'.format(i.upper()) elif intervals: raise ValueError("An interval without a cell_method is not allowed! Not possible!") else: d['cell_methods'] = ' '.join(cell_methods) d['interval'] = ','.join(intervals).upper() if 'vertical_datum' in d: d['vertical_datum'] = d['vertical_datum'].upper() return d
def dictify_urn(urn, combine_interval=True): """ By default, this will put the `interval` as part of the `cell_methods` attribute (NetCDF CF style). To return `interval` as its own key, use the `combine_interval=False` parameter. """ ioos_urn = IoosUrn.from_string(urn) if ioos_urn.valid() is False: return dict() if ioos_urn.asset_type != 'sensor': logger.error("This function only works on 'sensor' URNs.") return dict() if '#' in ioos_urn.component: standard_name, extras = ioos_urn.component.split('#') else: standard_name = ioos_urn.component extras = '' d = dict(standard_name=standard_name) # Discriminant if '-' in ioos_urn.component: d['discriminant'] = standard_name.split('-')[-1] d['standard_name'] = standard_name.split('-')[0] intervals = [] cell_methods = [] if extras: for section in extras.split(';'): key, values = section.split('=') if key == 'interval': # special case, intervals should be appended to the cell_methods for v in values.split(','): intervals.append(v) else: if key == 'cell_methods': value = [ x.replace('_', ' ').replace(':', ': ') for x in values.split(',') ] cell_methods = value else: value = ' '.join([x.replace('_', ' ').replace(':', ': ') for x in values.split(',')]) d[key] = value if combine_interval is True: if cell_methods and intervals: if len(cell_methods) == len(intervals): d['cell_methods'] = ' '.join([ '{} (interval: {})'.format(x[0], x[1].upper()) for x in zip(cell_methods, intervals) ]) else: d['cell_methods'] = ' '.join(cell_methods) for i in intervals: d['cell_methods'] += ' (interval: {})'.format(i.upper()) elif cell_methods: d['cell_methods'] = ' '.join(cell_methods) for i in intervals: d['cell_methods'] += ' (interval: {})'.format(i.upper()) elif intervals: raise ValueError("An interval without a cell_method is not allowed! Not possible!") else: d['cell_methods'] = ' '.join(cell_methods) d['interval'] = ','.join(intervals).upper() if 'vertical_datum' in d: d['vertical_datum'] = d['vertical_datum'].upper() return d
def from_glob(cls, glob_string, timevar_name='time', ncml=None): dataset_name = None dataset_starting = None dataset_ending = None dataset_variables = [] dataset_members = [] files = glob(glob_string) logger.info("Processing aggregation containing {!s} files".format(len(files))) for i, filepath in enumerate(files): logger.info("Processing member ({0}/{1}) - {2} ".format(i+1, len(files), filepath)) nc = None try: if ncml is not None: # Apply NcML tmp_f, tmp_fp = tempfile.mkstemp(prefix="nc") os.close(tmp_f) nc = pyncml.apply(filepath, ncml, output_file=tmp_fp) else: nc = netCDF4.Dataset(filepath) if dataset_name is None: if 'name' in nc.ncattrs(): dataset_name = nc.name elif 'title' in nc.ncattrs(): dataset_name = nc.title else: dataset_name = "Pyaxiom Glob Dataset" timevar = nc.variables.get(timevar_name) if timevar is None: logger.error("Time variable '{0}' was not found in file '{1}'. Skipping.".format(timevar_name, filepath)) continue # Start/Stop of NetCDF file starting = netCDF4.num2date(np.min(timevar[:]), units=timevar.units) ending = netCDF4.num2date(np.max(timevar[:]), units=timevar.units) variables = list([_f for _f in [ nc.variables[v].standard_name if hasattr(nc.variables[v], 'standard_name') else None for v in list(nc.variables.keys()) ] if _f]) dataset_variables = list(set(dataset_variables + variables)) if starting.tzinfo is None: starting = starting.replace(tzinfo=pytz.utc) if ending.tzinfo is None: ending = ending.replace(tzinfo=pytz.utc) if dataset_starting is None or starting < dataset_starting: dataset_starting = starting if dataset_ending is None or ending > dataset_ending: dataset_ending = ending member = DotDict(path=filepath, standard_names=variables, starting=starting, ending=ending) dataset_members.append(member) except BaseException: logger.exception("Something went wrong with {0}".format(filepath)) continue finally: nc.close() try: os.remove(tmp_fp) except (OSError, UnboundLocalError): pass dataset_members = sorted(dataset_members, key=operator.attrgetter('starting')) return cls(DotDict(name=dataset_name, timevar_name=timevar_name, starting=dataset_starting, ending=dataset_ending, standard_names=dataset_variables, members=dataset_members))