def computational_form(data): """ Repackages numbers, Series, or DataFrames Regardless of input format, mathematical operations may be performed on the output via the same pandas mechanisms. This method may be particularly useful in analysis methods that aim to be instrument independent. pysat.Instrument objects can package data in a variety of ways within a DataFrame, depending upon the scientific data source. Thus, a variety of data types will be encountered by instrument independent methods and computational_form method may reduce the effort required to support more generalized processing. Parameters ---------- data : pandas.Series Series of numbers, Series, DataFrames Returns ------- pandas.Series, DataFrame, or Panel repacked data, aligned by indices, ready for calculation """ from pysat import DataFrame, Series, datetime, Panel if isinstance(data.iloc[0], DataFrame): dslice = Panel.from_dict( dict([(i, data.iloc[i]) for i in xrange(len(data))])) elif isinstance(data.iloc[0], Series): dslice = DataFrame(data.tolist()) dslice.index = data.index else: dslice = data return dslice
def computational_form(data): """ Input Series of numbers, Series, or DataFrames repackaged for calculation. Parameters ---------- data : pandas.Series Series of numbers, Series, DataFrames Returns ------- pandas.Series, DataFrame, or Panel repacked data, aligned by indices, ready for calculation """ if isinstance(data.iloc[0], DataFrame): dslice = Panel.from_dict(dict([(i,data.iloc[i]) for i in xrange(len(data))])) elif isinstance(data.iloc[0], Series): dslice = DataFrame(data.tolist()) dslice.index = data.index else: dslice = data return dslice
def computational_form(data): """ Input Series of numbers, Series, or DataFrames repackaged for calculation. Parameters ---------- data : pandas.Series Series of numbers, Series, DataFrames Returns ------- pandas.Series, DataFrame, or Panel repacked data, aligned by indices, ready for calculation """ if isinstance(data.iloc[0], DataFrame): dslice = Panel.from_dict( dict([(i, data.iloc[i]) for i in xrange(len(data))])) elif isinstance(data.iloc[0], Series): dslice = DataFrame(data.tolist()) dslice.index = data.index else: dslice = data return dslice
def __init__(self, metadata=None, units_label='units', name_label='long_name', notes_label='notes', desc_label='desc', plot_label='label', axis_label='axis', scale_label='scale', min_label='value_min', max_label='value_max', fill_label='fill', export_nan=[]): # set mutability of Meta attributes self.mutable = True # set units and name labels directly self._units_label = units_label self._name_label = name_label self._notes_label = notes_label self._desc_label = desc_label self._plot_label = plot_label self._axis_label = axis_label self._scale_label = scale_label self._min_label = min_label self._max_label = max_label self._fill_label = fill_label # by default metadata with a value of nan will not be exported # unless the name is in the _export_nan list. Initialize the list # with the fill label, since it is reasonable to assume that a fill # value of nan would be intended to be exported self._export_nan = [fill_label] + export_nan # init higher order (nD) data structure container, a dict self._ho_data = {} # use any user provided data to instantiate object with data # attirube unit and name labels are called within if metadata is not None: if isinstance(metadata, DataFrame): self._data = metadata # make sure defaults are taken care of for required metadata self.accept_default_labels(self) else: raise ValueError(''.join(('Input must be a pandas DataFrame', 'type. See other constructors for', ' alternate inputs.'))) else: self._data = DataFrame(None, columns=[ self._units_label, self._name_label, self._desc_label, self._plot_label, self._axis_label, self._scale_label, self.notes_label, self._min_label, self._max_label, self._fill_label ]) # establish attributes intrinsic to object, before user can # add any self._base_attr = dir(self)
def __init__(self, metadata=None, units_label='units', name_label='long_name', notes_label='notes', desc_label='desc', plot_label='label', axis_label='axis', scale_label='scale', min_label='value_min', max_label='value_max', fill_label='fill'): # set units and name labels directly self._units_label = units_label self._name_label = name_label self._notes_label = notes_label self._desc_label = desc_label self._plot_label = plot_label self._axis_label = axis_label self._scale_label = scale_label self._min_label = min_label self._max_label = max_label self._fill_label = fill_label # init higher order (nD) data structure container, a dict self._ho_data = {} # use any user provided data to instantiate object with data # attirube unit and name labels are called within if metadata is not None: if isinstance(metadata, DataFrame): self._data = metadata # make sure defaults are taken care of for required metadata self.accept_default_labels(self) else: raise ValueError( "Input must be a pandas DataFrame type. " + "See other constructors for alternate inputs.") else: self._data = DataFrame(None, columns=[ self._units_label, self._name_label, self._desc_label, self._plot_label, self._axis_label, self._scale_label, self.notes_label, self._min_label, self._max_label, self._fill_label ]) # establish attributes intrinsic to object, before user can # add any self._base_attr = dir(self)
def computational_form(data): """ Repackages numbers, Series, or DataFrames .. deprecated:: 2.2.0 `computational_form` will be removed in pysat 3.0.0, it will be added to pysatSeasons Regardless of input format, mathematical operations may be performed on the output via the same pandas mechanisms. This method may be particularly useful in analysis methods that aim to be instrument independent. pysat.Instrument objects can package data in a variety of ways within a DataFrame, depending upon the scientific data source. Thus, a variety of data types will be encountered by instrument independent methods and computational_form method may reduce the effort required to support more generalized processing. Parameters ---------- data : pandas.Series Series of numbers, Series, DataFrames Returns ------- pandas.Series, DataFrame, or Panel repacked data, aligned by indices, ready for calculation """ from pysat import DataFrame, Series, Panel import warnings warnings.warn(' '.join([ "This function is deprecated here and will be", "removed in pysat 3.0.0. Please use", "pysatSeasons instead:" "https://github.com/pysat/pysatSeasons" ]), DeprecationWarning, stacklevel=2) if isinstance(data.iloc[0], DataFrame): dslice = Panel.from_dict( dict([(i, data.iloc[i]) for i in range(len(data))])) elif isinstance(data.iloc[0], Series): dslice = DataFrame(data.tolist()) dslice.index = data.index else: dslice = data return dslice
def _load_data(self, date=None, fid=None): """ Load data for an instrument on given date or fid, dependng upon input. """ if fid is not None: # get filename based off of index value fname = self.files[fid:fid + 1] elif date is not None: fname = self.files[date:date + pds.DateOffset(days=1)] else: raise ValueError('Must supply either a date or file id number.') if len(fname) > 0: load_fname = [os.path.join(self.files.data_path, f) for f in fname] data, mdata = self._load_rtn(load_fname, tag=self.tag, sat_id=self.sat_id, **self.kwargs) else: data = DataFrame(None) mdata = _meta.Meta() output_str = '{platform} {name} {tag} {sat_id}' output_str = output_str.format(platform=self.platform, name=self.name, tag=self.tag, sat_id=self.sat_id) if not data.empty: if not isinstance(data, DataFrame): raise TypeError( string.join(('Data returned by instrument load', 'routine must be a pandas.DataFrame'))) if not isinstance(mdata, _meta.Meta): raise TypeError( 'Metadata returned must be a pysat.Meta object') if date is not None: output_str = ' '.join( ('Returning', output_str, 'data for', date.strftime('%D'))) else: if len(fname) == 1: # this check was zero output_str = ' '.join( ('Returning', output_str, 'data from', fname[0])) else: output_str = ' '.join( ('Returning', output_str, 'data from', fname[0], '::', fname[-1])) else: # no data signal output_str = ' '.join( ('No', output_str, 'data for', date.strftime('%D'))) # remove extra spaces, if any output_str = " ".join(output_str.split()) print(output_str) return data, mdata
def __init__(self, metadata=None, units_label='units', name_label='long_name', notes_label='notes', desc_label='desc', plot_label='label', axis_label='axis', scale_label='scale', min_label='value_min', max_label='value_max', fill_label='fill'): # set units and name labels directly self._units_label = units_label self._name_label = name_label self._notes_label = notes_label self._desc_label = desc_label self._plot_label = plot_label self._axis_label = axis_label self._scale_label = scale_label self._min_label = min_label self._max_label = max_label self._fill_label = fill_label # init higher order (nD) data structure container, a dict self._ho_data = {} # use any user provided data to instantiate object with data # attirube unit and name labels are called within if metadata is not None: if isinstance(metadata, DataFrame): self._data = metadata # make sure defaults are taken care of for required metadata self.accept_default_labels(self) else: raise ValueError("Input must be a pandas DataFrame type. "+ "See other constructors for alternate inputs.") else: self._data = DataFrame(None, columns=[self._units_label, self._name_label, self._desc_label, self._plot_label, self._axis_label, self._scale_label, self.notes_label, self._min_label, self._max_label, self._fill_label]) # establish attributes intrinsic to object, before user can # add any self._base_attr = dir(self)
def replace(self, metadata=None): """Replace stored metadata with input data. Parameters ---------- metadata : pandas.DataFrame DataFrame should be indexed by variable name that contains at minimum the standard_name (name), units, and long_name for the data stored in the associated pysat Instrument object. """ if metadata is not None: if isinstance(metadata, DataFrame): self.data = metadata lower_columns = [name.lower() for name in self.data.columns] if 'long_name' not in lower_columns: self.data[self._name_label] = self.data.index if 'units' not in lower_columns: self.data[self._units_label] = '' else: raise ValueError("Input must be a pandas DataFrame type. "+ "See other constructors for alternate inputs.") else: self.data = DataFrame(None, columns=[self._name_label, self._units_label])
def load(self, orbit=None): """Load a particular orbit into .data for loaded day. Parameters ---------- orbit : int orbit number, 1 indexed Note ---- A day of data must be loaded before this routine functions properly. If the last orbit of the day is requested, it will automatically be padded with data from the next day. The orbit counter will be reset to 1. """ if len(self.sat.data) > 0: # ensure data exists # set up orbit metadata self._calcOrbits() # ensure user supplied an orbit if orbit is not None: # pull out requested orbit if orbit < 0: # negative indexing consistent with numpy, -1 last, -2 second # to last, etc. orbit = self.num + 1 + orbit if orbit == 1: # change from orig copied from _core, didn't look correct. # self._getBasicOrbit(orbit=2) try: true_date = self.sat.date # .copy() self.sat.prev() # if and else added becuase of CINDI turn off 6/5/2013,turn on 10/22/2014 # crashed when starting on 10/22/2014 # prev returned empty data if len(self.sat.data) > 0: self.load(orbit=-1) else: self.sat.next() self._getBasicOrbit(orbit=1) # check that this orbit should end on the current day delta = pds.to_timedelta(true_date - self.sat.data.index[0]) # print 'checking if first orbit should land on requested day' # print self.sat.date, self.sat.data.index[0], delta, delta >= self.orbit_period # print delta - self.orbit_period if delta >= self.orbit_period: # the orbit loaded isn't close enough to date # to be the first orbit of the day, move forward self.next() except StopIteration: # print 'going for basic orbit' self._getBasicOrbit(orbit=1) # includes hack to appear to be zero indexed print('Loaded Orbit:%i' % (self.current - 1)) # check if the first orbit is also the last orbit elif orbit == self.num: # we get here if user asks for last orbit # make sure that orbit data goes across daybreak as needed # load previous orbit if self.num != 1: self._getBasicOrbit(self.num - 1) self.next() else: self._getBasicOrbit(orbit=-1) elif orbit < self.num: # load orbit data into data self._getBasicOrbit(orbit) # includes hack to appear to be zero indexed print('Loaded Orbit:%i' % (self.current - 1)) else: # gone too far self.sat.data = DataFrame() raise Exception( 'Requested an orbit past total orbits for day') else: raise Exception('Must set an orbit') else: print('No data loaded in instrument object to determine orbits.')
def load(self, yr=None, doy=None, date=None, fname=None, fid=None, verifyPad=False): """Load instrument data into Instrument object .data. Parameters ---------- yr : integer year for desired data doy : integer day of year date : datetime object date to load fname : 'string' filename to be loaded verifyPad : boolean if True, padding data not removed (debug purposes) Returns -------- Void. Data is added to self.data Note ---- Loads data for a chosen instrument into .data. Any functions chosen by the user and added to the custom processing queue (.custom.add) are automatically applied to the data before it is available to user in .data. """ if date is not None: # date supplied getyrdoy checks if it is datetime year, doy = utils.getyrdoy(date) self.yr = year self.doy = doy self.date = date self._fid = None self._load_by_date = True inc = pds.DateOffset(days=1) curr = date elif (yr is not None) & (doy is not None): # if date not defined but both yr and doy are self.date = pds.datetime(yr, 1, 1) + pds.DateOffset(days=(doy - 1)) self.yr = yr self.doy = doy self._fid = None self._load_by_date = True inc = pds.DateOffset(days=1) curr = self.date elif fname is not None: # date will have to be set later by looking at the data self.date = None self.yr = None self.doy = None self._load_by_date = False # if no index, called func tries to find file in instrument dir, # throws error if it fails self._fid = self.files.get_index(fname) inc = 1 curr = self._fid.copy() elif fid is not None: self._load_by_date = False self._fid = fid self.date = None self.yr = None self.doy = None inc = 1 curr = fid else: estr = 'Must supply a yr,doy pair, or datetime object, or filename' estr = '{:s} to load data from.'.format(estr) raise TypeError(estr) self.orbits._reset() # if pad is true, need to have a three day/file load if (self.pad is not None) | self.multi_file_day: if self._next_data.empty & self._prev_data.empty: # data has not already been loaded for previous and next days # load data for all three print('Initializing three day/file window') # using current date or fid self._prev_data, self._prev_meta = self._load_prev() self._curr_data, self._curr_meta = \ self._load_data(date=self.date, fid=self._fid) self._next_data, self._next_meta = self._load_next() else: # moving forward in time if self._next_data_track == curr: self._prev_data = self._curr_data self._prev_meta = self._curr_meta self._curr_data = self._next_data self._curr_meta = self._next_meta self._next_data, self._next_meta = self._load_next() # moving backward in time elif self._prev_data_track == curr: self._next_data = self._curr_data self._next_meta = self._curr_meta self._curr_data = self._prev_data self._curr_meta = self._prev_meta self._prev_data, self._prev_meta = self._load_prev() # jumped in time/or switched from filebased to date based access else: self._prev_data, self._prev_meta = self._load_prev() self._curr_data, self._curr_meta = \ self._load_data(date=self.date, fid=self._fid) self._next_data, self._next_meta = self._load_next() # make sure datetime indices for all data is monotonic if not self._prev_data.index.is_monotonic_increasing: self._prev_data.sort_index(inplace=True) if not self._curr_data.index.is_monotonic_increasing: self._curr_data.sort_index(inplace=True) if not self._next_data.index.is_monotonic_increasing: self._next_data.sort_index(inplace=True) # make tracking indexes consistent with new loads self._next_data_track = curr + inc self._prev_data_track = curr - inc # attach data to object if not self._curr_data.empty: self.data = self._curr_data.copy() self.meta = self._curr_meta.copy() else: self.data = DataFrame(None) # line below removed as it would delete previous meta, if any # if you end a seasonal analysis with a day with no data, then # no meta: self.meta = _meta.Meta() if self.multi_file_day: self.data = self.data.ix[self.date:self.date + pds.DateOffset( hours=23, minutes=59, seconds=59, nanoseconds=99999999)] # pad data based upon passed parameter if (not self._prev_data.empty) & (not self.data.empty): if self.multi_file_day and self._load_by_date: padLeft = self._prev_data.ix[( self.date):self._curr_data.index[0]] else: padLeft = self._prev_data.ix[( self._curr_data.index[0] - self.pad):self._curr_data.index[0]] #self.data = pds.concat([padLeft[0:-1], self.data]) self.data = pds.concat([padLeft, self.data]) if (not self._next_data.empty) & (not self.data.empty): if self.multi_file_day and self._load_by_date: padRight = self._next_data.ix[self.date : (self.date + \ pds.DateOffset(hours=23, minutes=59, seconds=59, nanoseconds=99999999))] else: padRight = self._next_data.ix[self._curr_data.index[-1]:( self._curr_data.index[-1] + self.pad)] #self.data = pds.concat([self.data, padRight[1:]]) self.data = pds.concat([self.data, padRight]) # drop any possible duplicate index times #self.data.drop_duplicates(inplace=True) self.data = self.data[~self.data.index.duplicated()] # if self.pad is False, load single day else: self.data, meta = self._load_data(date=self.date, fid=self._fid) if not self.data.empty: self.meta = meta # check if load routine actually returns meta if self.meta.data.empty: self.meta[self.data.columns] = { 'long_name': self.data.columns, 'units': [''] * len(self.data.columns) } # if loading by file set the yr, doy, and date if not self._load_by_date: temp = self.data.index[0] temp = pds.datetime(temp.year, temp.month, temp.day) self.date = temp self.yr, self.doy = utils.getyrdoy(self.date) if not self.data.empty: self._default_rtn(self) # clean if (not self.data.empty) & (self.clean_level != 'none'): self._clean_rtn(self) # apply custom functions if not self.data.empty: self.custom._apply_all(self) # remove the excess padding, if any applied if (self.pad is not None) & (not self.data.empty) & (not verifyPad): self.data = self.data[self._curr_data.index[0]:self._curr_data. index[-1]] sys.stdout.flush() return
class Instrument(object): """Download, load, manage, modify and analyze science data. Parameters ---------- platform : string name of platform/satellite. name : string name of instrument. tag : string, optional identifies particular subset of instrument data. sat_id : string, optional identity within constellation clean_level : {'clean','dusty','dirty','none'}, optional level of data quality pad : pandas.DateOffset, or dictionary, optional Length of time to pad the begining and end of loaded data for time-series processing. Extra data is removed after applying all custom functions. Dictionary, if supplied, is simply passed to pandas DateOffset. orbit_info : dict Orbit information, {'index':index, 'kind':kind, 'period':period}. See pysat.Orbits for more information. inst_module : module, optional Provide instrument module directly. Takes precedence over platform/name. update_files : boolean, optional If True, immediately query filesystem for instrument files and store. temporary_file_list : boolean, optional If true, the list of Instrument files will not be written to disk. Prevents a race condition when running multiple pysat processes. multi_file_day : boolean, optional Set to True if Instrument data files for a day are spread across multiple files and data for day n could be found in a file with a timestamp of day n-1 or n+1. manual_org : bool if True, then pysat will look directly in pysat data directory for data files and will not use default /platform/name/tag directory_format : str directory naming structure in string format. Variables such as platform, name, and tag will be filled in as needed using python string formatting. The default directory structure would be expressed as '{platform}/{name}/{tag}' file_format : str or NoneType File naming structure in string format. Variables such as year, month, and sat_id will be filled in as needed using python string formatting. The default file format structure is supplied in the instrument list_files routine. Attributes ---------- data : pandas.DataFrame loaded science data date : pandas.datetime date for loaded data yr : int year for loaded data bounds : (datetime/filename/None, datetime/filename/None) bounds for loading data, supply array_like for a season with gaps doy : int day of year for loaded data files : pysat.Files interface to instrument files meta : pysat.Meta interface to instrument metadata, similar to netCDF 1.6 orbits : pysat.Orbits interface to extracting data orbit-by-orbit custom : pysat.Custom interface to instrument nano-kernel kwargs : dictionary keyword arguments passed to instrument loading routine Note ---- Pysat attempts to load the module platform_name.py located in the pysat/instruments directory. This module provides the underlying functionality to download, load, and clean instrument data. Alternatively, the module may be supplied directly using keyword inst_module. Examples -------- :: # 1-second mag field data vefi = pysat.Instrument(platform='cnofs', name='vefi', tag='dc_b', clean_level='clean') start = pysat.datetime(2009,1,1) stop = pysat.datetime(2009,1,2) vefi.download(start, stop) vefi.load(date=start) print(vefi['dB_mer']) print(vefi.meta['db_mer']) # 1-second thermal plasma parameters ivm = pysat.Instrument(platform='cnofs', name='ivm', tag='', clean_level='clean') ivm.download(start,stop) ivm.load(2009,1) print(ivm['ionVelmeridional']) # Ionosphere profiles from GPS occultation cosmic = pysat.Instrument('cosmic2013', 'gps', 'ionprf', altitude_bin=3) # bins profile using 3 km step cosmic.download(start, stop, user=user, password=password) cosmic.load(date=start) """ def __init__(self, platform=None, name=None, tag=None, sat_id=None, clean_level='clean', update_files=None, pad=None, orbit_info=None, inst_module=None, multi_file_day=None, manual_org=None, directory_format=None, file_format=None, temporary_file_list=False, *arg, **kwargs): if inst_module is None: # use strings to look up module name if isinstance(platform, str) and isinstance(name, str): self.platform = platform.lower() self.name = name.lower() # look to module for instrument functions and defaults self._assign_funcs(by_name=True) elif (platform is None) and (name is None): # creating "empty" Instrument object with this path self.name = '' self.platform = '' self._assign_funcs() else: raise ValueError( 'Inputs platform and name must both be strings, or both None.' ) else: # user has provided a module try: # platform and name are expected to be part of module self.name = inst_module.name.lower() self.platform = inst_module.platform.lower() except AttributeError: raise AttributeError( string.join(( 'A name and platform attribute for the ', 'instrument is required if supplying routine module directly.' ))) # look to module for instrument functions and defaults self._assign_funcs(inst_module=inst_module) # more reasonable defaults for optional parameters self.tag = tag.lower() if tag is not None else '' self.sat_id = sat_id.lower() if sat_id is not None else '' self.clean_level = (clean_level.lower() if clean_level is not None else 'none') # assign_func sets some instrument defaults, direct info rules all if directory_format is not None: self.directory_format = directory_format.lower() # value not provided by user, check if there is a value provided by # instrument module elif self.directory_format is not None: try: # check if it is a function self.directory_format = self.directory_format(tag, sat_id) except TypeError: pass if file_format is not None: self.file_format = file_format # value not provided by user, check if there is a value provided by # instrument module elif self.file_format is not None: # check if it is an iterable string. If it isn't formatted # properly, give a warning and set file_format to None if (not isinstance(self.file_format, str) or self.file_format.find("{") < 0 or self.file_format.find("}") < 1): estr = 'file format set to default, supplied string must be ' estr = '{:s}iteratable [{:}]'.format(estr, self.file_format) print(estr) self.file_format = None # set up empty data and metadata self.data = DataFrame(None) self.meta = _meta.Meta() # function processing class, processes data on load self.custom = _custom.Custom() # create arrays to store data around loaded day # enables padding across day breaks with minimal loads self._next_data = DataFrame(None) self._next_data_track = [] self._prev_data = DataFrame(None) self._prev_data_track = [] self._curr_data = DataFrame(None) # multi file day, default set by assign_funcs if multi_file_day is not None: self.multi_file_day = multi_file_day # arguments for padding if isinstance(pad, pds.DateOffset): self.pad = pad elif isinstance(pad, dict): self.pad = pds.DateOffset(**pad) elif pad is None: self.pad = None else: estr = 'pad must be a dictionary or a pandas.DateOffset instance.' raise ValueError(estr) # instantiate Files class manual_org = False if manual_org is None else manual_org temporary_file_list = not temporary_file_list self.files = _files.Files(self, manual_org=manual_org, directory_format=self.directory_format, update_files=update_files, file_format=self.file_format, write_to_disk=temporary_file_list) # set bounds for iteration # self.bounds requires the Files class # setting (None,None) loads default bounds self.bounds = (None, None) self.date = None self._fid = None self.yr = None self.doy = None self._load_by_date = False # initialize orbit support if orbit_info is None: if self.orbit_info is None: # if default info not provided, set None as default orbit_info = {'index': None, 'kind': None, 'period': None} else: # default provided by instrument module orbit_info = self.orbit_info self.orbits = _orbits.Orbits(self, **orbit_info) # store kwargs, passed to load routine self.kwargs = kwargs # run instrument init function, a basic pass function is used # if user doesn't supply the init function self._init_rtn(self) def __getitem__(self, key): """ Convenience notation for accessing data; inst['name'] is inst.data.name Examples -------- :: # By name inst['name'] # By position inst[row_index, 'name'] # Slicing by row inst[row1:row2, 'name'] # By Date inst[datetime, 'name'] # Slicing by date, inclusive inst[datetime1:datetime2, 'name'] # Slicing by name and row/date inst[datetime1:datetime1, 'name1':'name2'] """ if isinstance(key, tuple): # support slicing return self.data.ix[key[0], key[1]] else: return self.data[key] def __setitem__(self, key, new): """Convenience method for adding data to instrument. Examples -------- :: # Simple Assignment, default metadata assigned # 'long_name' = 'name' # 'units' = '' inst['name'] = newData # Assignment with Metadata inst['name'] = {'data':new_data, 'long_name':long_name, 'units':units} Note ---- If no metadata provided and if metadata for 'name' not already stored then default meta information is also added, long_name = 'name', and units = ''. """ if isinstance(new, dict): # metadata should be included in dict self.data[key] = new.pop('data') # pass the rest to meta self.meta[key] = new else: if isinstance(key, tuple): self.data.ix[key[0], key[1]] = new self.meta[key[1]] = {} elif isinstance(key, str): self.data[key] = new self.meta[key] = {} elif isinstance(new, DataFrame): self.data[key] = new[key] for ke in key: self.meta[ke] = {} else: raise ValueError("No support for supplied input key") def copy(self): """Deep copy of the entire Instrument object.""" return copy.deepcopy(self) def _pass_func(*args, **kwargs): pass def _assign_funcs(self, by_name=False, inst_module=None): """Assign all external science instrument methods to Instrument object.""" import importlib # set defaults self._list_rtn = self._pass_func self._load_rtn = self._pass_func self._default_rtn = self._pass_func self._clean_rtn = self._pass_func self._init_rtn = self._pass_func self._download_rtn = self._pass_func # default params self.directory_format = None self.file_format = None self.multi_file_day = False self.orbit_info = None if by_name: # look for code with filename name, any errors passed up inst = importlib.import_module(''.join( ('.', self.platform, '_', self.name)), package='pysat.instruments') elif inst_module is not None: # user supplied an object with relevant instrument routines inst = inst_module else: # no module or name info, default pass functions assigned return try: self._load_rtn = inst.load self._list_rtn = inst.list_files self._download_rtn = inst.download except AttributeError: estr = 'A load, file_list, and download routine are required for ' raise AttributeError('{:s}every instrument.'.format(estr)) try: self._default_rtn = inst.default except AttributeError: pass try: self._init_rtn = inst.init except AttributeError: pass try: self._clean_rtn = inst.clean except AttributeError: pass # look for instrument default parameters try: self.directory_format = inst.directory_format except AttributeError: pass try: self.multi_file_day = inst.self.multi_file_day except AttributeError: pass try: self.orbit_info = inst.orbit_info except AttributeError: pass return def _load_data(self, date=None, fid=None): """ Load data for an instrument on given date or fid, dependng upon input. """ if fid is not None: # get filename based off of index value fname = self.files[fid:fid + 1] elif date is not None: fname = self.files[date:date + pds.DateOffset(days=1)] else: raise ValueError('Must supply either a date or file id number.') if len(fname) > 0: load_fname = [os.path.join(self.files.data_path, f) for f in fname] data, mdata = self._load_rtn(load_fname, tag=self.tag, sat_id=self.sat_id, **self.kwargs) else: data = DataFrame(None) mdata = _meta.Meta() output_str = '{platform} {name} {tag} {sat_id}' output_str = output_str.format(platform=self.platform, name=self.name, tag=self.tag, sat_id=self.sat_id) if not data.empty: if not isinstance(data, DataFrame): raise TypeError( string.join(('Data returned by instrument load', 'routine must be a pandas.DataFrame'))) if not isinstance(mdata, _meta.Meta): raise TypeError( 'Metadata returned must be a pysat.Meta object') if date is not None: output_str = ' '.join( ('Returning', output_str, 'data for', date.strftime('%D'))) else: if len(fname) == 1: # this check was zero output_str = ' '.join( ('Returning', output_str, 'data from', fname[0])) else: output_str = ' '.join( ('Returning', output_str, 'data from', fname[0], '::', fname[-1])) else: # no data signal output_str = ' '.join( ('No', output_str, 'data for', date.strftime('%D'))) # remove extra spaces, if any output_str = " ".join(output_str.split()) print(output_str) return data, mdata def _load_next(self): """Load the next days data (or file) without incrementing the date. Repeated calls will not advance date/file and will produce the same data Uses info stored in object to either increment the date, or the file. Looks for self._load_by_date flag. """ if self._load_by_date: next_date = self.date + pds.DateOffset(days=1) return self._load_data(date=next_date) else: return self._load_data(fid=self._fid + 1) def _load_prev(self): """Load the next days data (or file) without decrementing the date. Repeated calls will not decrement date/file and will produce the same data Uses info stored in object to either decrement the date, or the file. Looks for self._load_by_date flag. """ if self._load_by_date: prev_date = self.date - pds.DateOffset(days=1) return self._load_data(date=prev_date) else: return self._load_data(fid=self._fid - 1) def load(self, yr=None, doy=None, date=None, fname=None, fid=None, verifyPad=False): """Load instrument data into Instrument object .data. Parameters ---------- yr : integer year for desired data doy : integer day of year date : datetime object date to load fname : 'string' filename to be loaded verifyPad : boolean if True, padding data not removed (debug purposes) Returns -------- Void. Data is added to self.data Note ---- Loads data for a chosen instrument into .data. Any functions chosen by the user and added to the custom processing queue (.custom.add) are automatically applied to the data before it is available to user in .data. """ if date is not None: # date supplied getyrdoy checks if it is datetime year, doy = utils.getyrdoy(date) self.yr = year self.doy = doy self.date = date self._fid = None self._load_by_date = True inc = pds.DateOffset(days=1) curr = date elif (yr is not None) & (doy is not None): # if date not defined but both yr and doy are self.date = pds.datetime(yr, 1, 1) + pds.DateOffset(days=(doy - 1)) self.yr = yr self.doy = doy self._fid = None self._load_by_date = True inc = pds.DateOffset(days=1) curr = self.date elif fname is not None: # date will have to be set later by looking at the data self.date = None self.yr = None self.doy = None self._load_by_date = False # if no index, called func tries to find file in instrument dir, # throws error if it fails self._fid = self.files.get_index(fname) inc = 1 curr = self._fid.copy() elif fid is not None: self._load_by_date = False self._fid = fid self.date = None self.yr = None self.doy = None inc = 1 curr = fid else: estr = 'Must supply a yr,doy pair, or datetime object, or filename' estr = '{:s} to load data from.'.format(estr) raise TypeError(estr) self.orbits._reset() # if pad is true, need to have a three day/file load if (self.pad is not None) | self.multi_file_day: if self._next_data.empty & self._prev_data.empty: # data has not already been loaded for previous and next days # load data for all three print('Initializing three day/file window') # using current date or fid self._prev_data, self._prev_meta = self._load_prev() self._curr_data, self._curr_meta = \ self._load_data(date=self.date, fid=self._fid) self._next_data, self._next_meta = self._load_next() else: # moving forward in time if self._next_data_track == curr: self._prev_data = self._curr_data self._prev_meta = self._curr_meta self._curr_data = self._next_data self._curr_meta = self._next_meta self._next_data, self._next_meta = self._load_next() # moving backward in time elif self._prev_data_track == curr: self._next_data = self._curr_data self._next_meta = self._curr_meta self._curr_data = self._prev_data self._curr_meta = self._prev_meta self._prev_data, self._prev_meta = self._load_prev() # jumped in time/or switched from filebased to date based access else: self._prev_data, self._prev_meta = self._load_prev() self._curr_data, self._curr_meta = \ self._load_data(date=self.date, fid=self._fid) self._next_data, self._next_meta = self._load_next() # make sure datetime indices for all data is monotonic if not self._prev_data.index.is_monotonic_increasing: self._prev_data.sort_index(inplace=True) if not self._curr_data.index.is_monotonic_increasing: self._curr_data.sort_index(inplace=True) if not self._next_data.index.is_monotonic_increasing: self._next_data.sort_index(inplace=True) # make tracking indexes consistent with new loads self._next_data_track = curr + inc self._prev_data_track = curr - inc # attach data to object if not self._curr_data.empty: self.data = self._curr_data.copy() self.meta = self._curr_meta.copy() else: self.data = DataFrame(None) # line below removed as it would delete previous meta, if any # if you end a seasonal analysis with a day with no data, then # no meta: self.meta = _meta.Meta() if self.multi_file_day: self.data = self.data.ix[self.date:self.date + pds.DateOffset( hours=23, minutes=59, seconds=59, nanoseconds=99999999)] # pad data based upon passed parameter if (not self._prev_data.empty) & (not self.data.empty): if self.multi_file_day and self._load_by_date: padLeft = self._prev_data.ix[( self.date):self._curr_data.index[0]] else: padLeft = self._prev_data.ix[( self._curr_data.index[0] - self.pad):self._curr_data.index[0]] #self.data = pds.concat([padLeft[0:-1], self.data]) self.data = pds.concat([padLeft, self.data]) if (not self._next_data.empty) & (not self.data.empty): if self.multi_file_day and self._load_by_date: padRight = self._next_data.ix[self.date : (self.date + \ pds.DateOffset(hours=23, minutes=59, seconds=59, nanoseconds=99999999))] else: padRight = self._next_data.ix[self._curr_data.index[-1]:( self._curr_data.index[-1] + self.pad)] #self.data = pds.concat([self.data, padRight[1:]]) self.data = pds.concat([self.data, padRight]) # drop any possible duplicate index times #self.data.drop_duplicates(inplace=True) self.data = self.data[~self.data.index.duplicated()] # if self.pad is False, load single day else: self.data, meta = self._load_data(date=self.date, fid=self._fid) if not self.data.empty: self.meta = meta # check if load routine actually returns meta if self.meta.data.empty: self.meta[self.data.columns] = { 'long_name': self.data.columns, 'units': [''] * len(self.data.columns) } # if loading by file set the yr, doy, and date if not self._load_by_date: temp = self.data.index[0] temp = pds.datetime(temp.year, temp.month, temp.day) self.date = temp self.yr, self.doy = utils.getyrdoy(self.date) if not self.data.empty: self._default_rtn(self) # clean if (not self.data.empty) & (self.clean_level != 'none'): self._clean_rtn(self) # apply custom functions if not self.data.empty: self.custom._apply_all(self) # remove the excess padding, if any applied if (self.pad is not None) & (not self.data.empty) & (not verifyPad): self.data = self.data[self._curr_data.index[0]:self._curr_data. index[-1]] sys.stdout.flush() return def download(self, start, stop, freq='D', user=None, password=None): """Download data for given Instrument object from start to stop. Parameters ---------- start : pandas.datetime start date to download data stop : pandas.datetime stop date to download data freq : string Stepsize between dates for season, 'D' for daily, 'M' monthly (see pandas) user : string username, if required by instrument data archive password : string password, if required by instrument data archive Note ---- Data will be downloaded to pysat_data_dir/patform/name/tag If Instrument bounds are set to defaults they are updated after files are downloaded. """ import errno # make sure directories are there, otherwise create them try: os.makedirs(self.files.data_path) except OSError as e: if e.errno != errno.EEXIST: raise print('Downloading data to: ', self.files.data_path) date_array = utils.season_date_range(start, stop, freq=freq) if user is None: self._download_rtn(date_array, tag=self.tag, sat_id=self.sat_id, data_path=self.files.data_path) else: self._download_rtn(date_array, tag=self.tag, sat_id=self.sat_id, data_path=self.files.data_path, user=user, password=password) # get current file date range first_date = self.files.start_date last_date = self.files.stop_date print('Updating pysat file list') self.files.refresh() # if instrument object has default bounds, update them if len(self.bounds[0]) == 1: if (self.bounds[0][0] == first_date and self.bounds[1][0] == last_date): print('Updating instrument object bounds.') self.bounds = None @property def bounds(self): """Boundaries for iterating over instrument object by date or file. Parameters ---------- start : datetime object, filename, or None (default) start of iteration, if None uses first data date. list-like collection also accepted end : datetime object, filename, or None (default) end of iteration, inclusive. If None uses last data date. list-like collection also accepted Note ---- Both start and stop must be the same type (date, or filename) or None Examples -------- :: inst = pysat.Instrument(platform=platform, name=name, tag=tag) start = pysat.datetime(2009,1,1) stop = pysat.datetime(2009,1,31) inst.bounds = (start,stop) start2 = pysat.datetetime(2010,1,1) stop2 = pysat.datetime(2010,2,14) inst.bounds = ([start, start2], [stop, stop2]) """ return self._iter_start, self._iter_stop @bounds.setter def bounds(self, value=None): if value is None: value = (None, None) if len(value) < 2: raise ValueError('Must supply both a start and end date/file' + 'Supply None if you want the first/last possible') start = value[0] end = value[1] # get the frequency, or step size, of season if len(value) == 3: step = value[2] else: # default do daily step = 'D' if (start is None) and (end is None): # set default self._iter_start = [self.files.start_date] self._iter_stop = [self.files.stop_date] self._iter_type = 'date' if self._iter_start[0] is not None: # check here in case Instrument is initialized with no input self._iter_list = utils.season_date_range(self._iter_start, self._iter_stop, freq=step) elif (hasattr(start, '__iter__') and not isinstance(start, str)) and ( hasattr(end, '__iter__') and not isinstance(end, str)): base = type(start[0]) for s, t in zip(start, end): if (type(s) != type(t)) or (type(s) != base): raise ValueError( 'Start and end items must all be of the same type') if isinstance(start[0], str): self._iter_type = 'file' self._iter_list = self.files.get_file_array(start, end) elif isinstance(start[0], pds.datetime): self._iter_type = 'date' self._iter_list = utils.season_date_range(start, end, freq=step) else: raise ValueError( 'Input is not a known type, string or datetime') self._iter_start = start self._iter_stop = end elif (hasattr(start, '__iter__') and not isinstance(start, str)) or ( hasattr(end, '__iter__') and not isinstance(end, str)): raise ValueError( 'Both start and end must be iterable if one bound is iterable') elif isinstance(start, str) or isinstance(end, str): if isinstance(start, pds.datetime) or isinstance( end, pds.datetime): raise ValueError('Not allowed to mix file and date bounds') if start is None: start = self.files[0] if end is None: end = self.files.files[-1] self._iter_start = [start] self._iter_stop = [end] self._iter_list = self.files.get_file_array( self._iter_start, self._iter_stop) self._iter_type = 'file' elif isinstance(start, pds.datetime) or isinstance(end, pds.datetime): if start is None: start = self.files.start_date if end is None: end = self.files.stop_date self._iter_start = [start] self._iter_stop = [end] self._iter_list = utils.season_date_range(start, end, freq=step) self._iter_type = 'date' else: raise ValueError( 'Provided an invalid combination of bounds. ' + 'if specifying by file, both bounds must be by file. Other ' + 'combinations of datetime objects and None are allowed.') def __iter__(self): """Iterates instrument object by loading subsequent days or files. Note ---- Limits of iteration, and iteration type (date/file) set by `bounds` attribute. Default bounds are the first and last dates from files on local system. Examples -------- :: inst = pysat.Instrument(platform=platform, name=name, tag=tag) start = pysat.datetime(2009,1,1) stop = pysat.datetime(2009,1,31) inst.bounds = (start,stop) for inst in inst: print('Another day loaded', inst.date) """ if self._iter_type == 'file': for fname in self._iter_list: self.load(fname=fname) yield self elif self._iter_type == 'date': for date in self._iter_list: self.load(date=date) yield self def next(self): """Manually iterate through the data loaded in Instrument object. Bounds of iteration and iteration type (day/file) are set by `bounds` attribute. Note ---- If there were no previous calls to load then the first day(default)/file will be loaded. """ if self._iter_type == 'date': if self.date is not None: idx, = np.where(self._iter_list == self.date) if (len(idx) == 0) | (idx + 1 >= len(self._iter_list)): raise StopIteration('Outside the set date boundaries.') else: idx += 1 self.load(date=self._iter_list[idx[0]]) else: self.load(date=self._iter_list[0]) elif self._iter_type == 'file': if self._fid is not None: first = self.files.get_index(self._iter_list[0]) last = self.files.get_index(self._iter_list[-1]) if (self._fid < first) | (self._fid + 1 > last): raise StopIteration('Outside the set file boundaries.') else: self.load(fname=self._iter_list[self._fid + 1 - first]) else: self.load(fname=self._iter_list[0]) def prev(self): """Manually iterate backwards through the data in Instrument object. Bounds of iteration and iteration type (day/file) are set by `bounds` attribute. Note ---- If there were no previous calls to load then the first day(default)/file will be loaded. """ if self._iter_type == 'date': if self.date is not None: idx, = np.where(self._iter_list == self.date) if (len(idx) == 0) | (idx - 1 < 0): raise StopIteration('Outside the set date boundaries.') else: idx -= 1 self.load(date=self._iter_list[idx[0]]) else: self.load(date=self._iter_list[-1]) elif self._iter_type == 'file': if self._fid is not None: first = self.files.get_index(self._iter_list[0]) last = self.files.get_index(self._iter_list[-1]) if (self._fid - 1 < first) | (self._fid > last): raise StopIteration('Outside the set file boundaries.') else: self.load(fname=self._iter_list[self._fid - 1 - first]) else: self.load(fname=self._iter_list[-1]) def to_netcdf4(self, fname=None, format=None): """Stores loaded data into a netCDF3/4 file. Parameters ---------- fname : string full path to save instrument object to format : string format keyword passed to netCDF4 routine NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, and NETCDF4 Note ---- Stores 1-D data along dimension 'time' - the date time index. Stores object data (e.g. dataframes within series) separately - The name of the series is used to prepend extra variable dimensions within netCDF, key_2, key_3; first dimension time - The index organizing the data stored as key_sample_index - from_netcdf3 uses this naming scheme to reconstruct data structure The datetime index is stored as 'UNIX time'. netCDF-3 doesn't support 64-bit integers so it is stored as a 64-bit float. This results in a loss of datetime precision when converted back to datetime index up to hundreds of nanoseconds. Use netCDF4 if this is a problem. All attributes attached to instrument meta are written to netCDF attrs. """ import netCDF4 if format is None: format = 'NETCDF3_64BIT' else: format = format.upper() with netCDF4.Dataset(fname, mode='w', format=format) as out_data: num = len(self.data.index) out_data.createDimension('time', num) # write out the datetime index cdfkey = out_data.createVariable( 'time', 'f8', dimensions=('time'), ) cdfkey.units = 'seconds since 1970-1-1 0:0:0' cdfkey.long_name = 'UNIX time' cdfkey.calendar = 'standard' cdfkey[:] = (self.data.index.astype(int) * 1.E-3).astype(int) * 1.E-6 # store all of the data in dataframe columns for key in self.data.columns: if self[key].dtype != np.dtype('O'): # not an object, simple column of data, write it out if ((self[key].dtype == np.int64) & (format[:7] == 'NETCDF3')): self[key] = self[key].astype(np.int32) cdfkey = out_data.createVariable( key, self[key].dtype, dimensions=('time'), ) cdfkey.units = self.meta[key].units cdfkey.long_name = self.meta[key].long_name cdfkey[:] = self[key].values else: # we are dealing with a more complicated object # presuming a series with a dataframe in each location dims = np.shape(self[key].iloc[0]) obj_dim_names = [] # don't need to recreate last dimension, # it covers number of columns for i, dim in enumerate(dims[:-1]): obj_dim_names.append(key + '_dim_%i' % (i + 1)) out_data.createDimension(obj_dim_names[-1], dim) var_dim = tuple(['time'] + obj_dim_names) #print (key, var_dim) # iterate over columns and store try: iterable = self[key].iloc[0].columns is_frame = True except AttributeError: # looking at a series, which doesn't have columns iterable = self[key].iloc[0].name is_frame = False for col in iterable: if is_frame: coltype = self[key].iloc[0][col].dtype else: coltype = self[key].iloc[0].dtype if ((coltype == np.int64) & (format[:7] == 'NETCDF3')): coltype = np.int32 #elif coltype == np.dtype('O'): # if isinstance(self[key].iloc[0][col][0], basestring): # coltype = 'S1' #print (key+'_' +col, var_dim, coltype) cdfkey = out_data.createVariable(key + '_' + col, coltype, dimensions=var_dim) cdfkey.long_name = col cdfkey.units = '' if is_frame: for i in xrange(num): cdfkey[i, :] = self[key].iloc[i][ col].values.astype(coltype) else: #print (self[key]) print(np.shape(cdfkey)) for i in xrange(num): print(i) cdfkey[i, :] = self[key].iloc[i].values.astype( coltype) # store the dataframe index for each time of main dataframe datetime_flag = False coltype = self[key].iloc[0].index.dtype # check for datetime index if coltype == np.dtype('<M8[ns]'): coltype = 'f8' datetime_flag = True if coltype == np.int64: coltype = np.int32 #print (key+'_' + '_ample', var_dim, coltype) cdfkey = out_data.createVariable(key + '_dim_1', coltype, dimensions=var_dim) if datetime_flag: cdfkey.units = 'seconds since 1970-1-1 0:0:0' cdfkey.long_name = 'UNIX time' for i in xrange(num): cdfkey[i, :] = (self[key].iloc[i].index.astype(int) * 1.E-3).astype(int) * 1.E-6 else: cdfkey.units = '' if self[key].iloc[0].index.name is not None: cdfkey.long_name = self[key].iloc[0].index.name else: cdfkey.long_name = key for i in xrange(num): cdfkey[i, :] = self[key].iloc[ i].index.to_native_types() # store any non standard attributes base_attrb = dir(Instrument()) this_attrb = dir(self) adict = {} for key in this_attrb: if key not in base_attrb: if key[0] != '_': adict[key] = self.__getattribute__(key) # store any non-standard attributes attached to meta base_attrb = dir(_meta.Meta()) this_attrb = dir(self.meta) for key in this_attrb: if key not in base_attrb: if key[0] != '_': adict[key] = self.meta.__getattribute__(key) adict['pysat_version'] = 1.0 adict['Conventions'] = 'CF-1.6' # check for binary types for key in adict.keys(): if isinstance(adict[key], bool): adict[key] = int(adict[key]) out_data.setncatts(adict) return
def __init__(self, platform=None, name=None, tag=None, sat_id=None, clean_level='clean', update_files=None, pad=None, orbit_info=None, inst_module=None, multi_file_day=None, manual_org=None, directory_format=None, file_format=None, temporary_file_list=False, *arg, **kwargs): if inst_module is None: # use strings to look up module name if isinstance(platform, str) and isinstance(name, str): self.platform = platform.lower() self.name = name.lower() # look to module for instrument functions and defaults self._assign_funcs(by_name=True) elif (platform is None) and (name is None): # creating "empty" Instrument object with this path self.name = '' self.platform = '' self._assign_funcs() else: raise ValueError( 'Inputs platform and name must both be strings, or both None.' ) else: # user has provided a module try: # platform and name are expected to be part of module self.name = inst_module.name.lower() self.platform = inst_module.platform.lower() except AttributeError: raise AttributeError( string.join(( 'A name and platform attribute for the ', 'instrument is required if supplying routine module directly.' ))) # look to module for instrument functions and defaults self._assign_funcs(inst_module=inst_module) # more reasonable defaults for optional parameters self.tag = tag.lower() if tag is not None else '' self.sat_id = sat_id.lower() if sat_id is not None else '' self.clean_level = (clean_level.lower() if clean_level is not None else 'none') # assign_func sets some instrument defaults, direct info rules all if directory_format is not None: self.directory_format = directory_format.lower() # value not provided by user, check if there is a value provided by # instrument module elif self.directory_format is not None: try: # check if it is a function self.directory_format = self.directory_format(tag, sat_id) except TypeError: pass if file_format is not None: self.file_format = file_format # value not provided by user, check if there is a value provided by # instrument module elif self.file_format is not None: # check if it is an iterable string. If it isn't formatted # properly, give a warning and set file_format to None if (not isinstance(self.file_format, str) or self.file_format.find("{") < 0 or self.file_format.find("}") < 1): estr = 'file format set to default, supplied string must be ' estr = '{:s}iteratable [{:}]'.format(estr, self.file_format) print(estr) self.file_format = None # set up empty data and metadata self.data = DataFrame(None) self.meta = _meta.Meta() # function processing class, processes data on load self.custom = _custom.Custom() # create arrays to store data around loaded day # enables padding across day breaks with minimal loads self._next_data = DataFrame(None) self._next_data_track = [] self._prev_data = DataFrame(None) self._prev_data_track = [] self._curr_data = DataFrame(None) # multi file day, default set by assign_funcs if multi_file_day is not None: self.multi_file_day = multi_file_day # arguments for padding if isinstance(pad, pds.DateOffset): self.pad = pad elif isinstance(pad, dict): self.pad = pds.DateOffset(**pad) elif pad is None: self.pad = None else: estr = 'pad must be a dictionary or a pandas.DateOffset instance.' raise ValueError(estr) # instantiate Files class manual_org = False if manual_org is None else manual_org temporary_file_list = not temporary_file_list self.files = _files.Files(self, manual_org=manual_org, directory_format=self.directory_format, update_files=update_files, file_format=self.file_format, write_to_disk=temporary_file_list) # set bounds for iteration # self.bounds requires the Files class # setting (None,None) loads default bounds self.bounds = (None, None) self.date = None self._fid = None self.yr = None self.doy = None self._load_by_date = False # initialize orbit support if orbit_info is None: if self.orbit_info is None: # if default info not provided, set None as default orbit_info = {'index': None, 'kind': None, 'period': None} else: # default provided by instrument module orbit_info = self.orbit_info self.orbits = _orbits.Orbits(self, **orbit_info) # store kwargs, passed to load routine self.kwargs = kwargs # run instrument init function, a basic pass function is used # if user doesn't supply the init function self._init_rtn(self)
def __setitem__(self, name, value): """Convenience method for adding metadata. Examples -------- :: meta = pysat.Meta() meta['name'] = {'long_name':string, 'units':string} # update 'units' to new value meta['name'] = {'units':string} # update 'long_name' to new value meta['name'] = {'long_name':string} # attach new info with partial information, 'long_name' set to 'name2' meta['name2'] = {'units':string} # units are set to '' by default meta['name3'] = {'long_name':string} """ if isinstance(value, dict): # check if dict empty if value.keys() == []: # null input, everything should be set to default if isinstance(name, basestring): if name in self: # variable already exists and we don't have anything # new to add, just leave return # otherwise, continue on and set defaults else: new_name = [] for n in name: if n not in self: new_name.append(n) name = new_name if len(name) == 0: # all variables exist, can leave return else: # otherwise, continue on and set defaults # create empty input for all remaining names value = {} value[self._units_label] = ['']*len(name) value[self._name_label] = name # for na in name: # value[na] = [[]] # if not passed an iterable, make it one if isinstance(name, basestring): name = [name] for key in value.keys(): value[key] = [value[key]] # if len(name) != len(value): # raise ValueError('Length of names and all inputs must be equal.') for key in value.keys(): if len(name) != len(value[key]): raise ValueError('Length of names and inputs must be equal.') if 'meta' in value.keys(): # process higher order stuff first # could be part of multiple assignment # so assign the Meta objects, then remove all trace # of names with Meta pop_list = [] pop_loc = [] for j, (item, val) in enumerate(zip(name, value['meta'])): if val is not None: # assign meta data, recursive call.... self[item] = val pop_list.append(item) pop_loc.append(j) # remove 'meta' objects from input if len(value.keys()) > 1: _ = value.pop('meta') else: value = {} name = [] for item, loc in zip(pop_list[::-1], pop_loc[::-1]): # remove data names that had a Meta object assigned # they are not part of any future processing if len(name) > 1: _ = name.pop(loc) else: name = [] # remove place holder data in other values that used # to have to account for presence of Meta object # going through backwards so I don't mess with location references for key in value.keys(): _ = value[key].pop(loc) lower_keys = [k.lower() for k in value.keys()] if 'units' not in lower_keys: # provide default value, or copy existing value[self._units_label] = [] for item_name in name: if item_name not in self: value[self._units_label].append('') else: value[self._units_label].append(self[item_name, 'units']) # need to ensure that the units string is consistent with the rest # probably, that is if 'long_name' not in lower_keys: # provide default value, or copy existing value[self._name_label] = [] for item_name in name: if item_name not in self: value[self._name_label].append(item_name) else: value[self._name_label].append(self[item_name,'long_name']) if len(name) > 0: # make sure there is still something to add new = DataFrame(value, index=name) for item_name,item in new.iterrows(): if item_name not in self: self.data = self.data.append(item) else: # info already exists, update with new info for item_key in item.keys(): self.data.loc[item_name, item_key] = item[item_key] elif isinstance(value, Series): self.data.loc[name] = value elif isinstance(value, Meta): # dealing with higher order data set self.ho_data[name] = value
def load_netcdf4(fnames=None, strict_meta=False, format=None): #, index_label=None, # unix_time=False, **kwargs): """Load netCDF-3/4 file produced by pysat. Parameters ---------- fnames : string or array_like of strings filenames to load strict_meta : boolean check if metadata across fnames is the same format : string format keyword passed to netCDF4 routine NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, and NETCDF4 """ import netCDF4 import string import pysat if fnames is None: raise ValueError("Must supply a filename/list of filenames") if isinstance(fnames, basestring): fnames = [fnames] if format is None: format = 'NETCDF3_64BIT' else: format = format.upper() saved_mdata = None running_idx = 0 running_store = [] two_d_keys = [] two_d_dims = [] for fname in fnames: with netCDF4.Dataset(fname, mode='r', format=format) as data: # build up dictionary with all ncattrs # and add those attributes to a pysat meta object ncattrsList = data.ncattrs() mdata = pysat.Meta() for d in ncattrsList: if hasattr(mdata, d): mdata.__setattr__(d + '_', data.getncattr(d)) else: mdata.__setattr__(d, data.getncattr(d)) # loadup all of the variables in the netCDF loadedVars = {} for key in data.variables.keys(): # load up metadata # from here group unique dimensions and act accordingly, 1D, 2D, 3D if len(data.variables[key].dimensions) == 1: # assuming basic time dimension loadedVars[key] = data.variables[key][:] if key != 'time': mdata[key] = { 'long_name': data.variables[key].long_name, 'units': data.variables[key].units } # 'nc_dimensions':data.variables[key].dimensions} if len(data.variables[key].dimensions) == 2: # part of dataframe within dataframe two_d_keys.append(key) two_d_dims.append(data.variables[key].dimensions) # we now have a list of keys that need to go into a dataframe, # could be more than one, collect unique dimensions for 2D keys for dim in set(two_d_dims): # get the name of the final data column # dimension naming follows name_dim_number, # pull out name by finding last _ and tracking back obj_key_name = dim[1][:-string.find(dim[1][::-1], '_') - 5] # collect variable names associated with object obj_var_keys = [] for tkey, tdim in zip(two_d_keys, two_d_dims): if tdim == dim: obj_var_keys.append(tkey) # loop over first object dimension # preallocate dataframes to hold objects because it is faster init_frame = DataFrame(None) loop_list = [init_frame ] * data.variables[obj_var_keys[0]].shape[0] for i, loop_frame in enumerate(loop_list): loop_frame = init_frame.copy() for key in obj_var_keys: loop_frame[key[len(obj_key_name) + 1:]] = data.variables[key][i, :] # if the object index uses unix time, process into datetime index if data.variables[obj_key_name + '_dim_1'].long_name == 'UNIX time': # nanosecond resolution from datetime64 can't be stored in netcdf3 # no 64-bit integers # it is stored as a float, need to undo processing # due to precision loss, resolution limited to the microsecond loop_frame.index = pds.to_datetime( (1E6 * loop_frame['dim_1']).astype(int) * 1000) loop_frame.index.name = 'time' else: loop_frame.index = loop_frame['dim_1'] loop_frame.index.name = data.variables[ obj_key_name + '_dim_1'].long_name del loop_frame['dim_1'] loop_list[i] = loop_frame #print (loop_list[i] ) #loop_list.append(loop_frame) # add object data to loaded data dictionary loadedVars[obj_key_name] = loop_list del loop_list # prepare dataframe index for this netcdf file loadedVars['time'] = pds.to_datetime( (loadedVars.pop('time') * 1E6).astype(int) * 1000) running_store.append(loadedVars) running_idx += len(loadedVars['time']) # if index_label is not None: # if unix_time: # loadedVars['_index'] = pds.to_datetime((loadedVars.pop(index_label)*1E6).astype(int)*1000) # else: # loadedVars['_index'] = loadedVars.pop(index_label) # running_store.append(loadedVars) # running_idx += len(loadedVars['_index']) # else: # # keep a running integer index if none provided # num = len(loadedVars[loadedVars.keys()[0]]) # # this only guaranteed to work if all variables share the same # # first dimension # loadedVars['_index'] = np.arange(num) + running_idx # running_store.append(loadedVars) # running_idx += num if strict_meta: if saved_mdata is None: saved_mdata = copy.deepcopy(mdata) elif (mdata != saved_mdata): raise ValueError( 'Metadata across filenames is not the same.') # combine all of the data loaded across files together # currently doesn't work if list of dicts of lists is provided # in other words, only one file at a time out = DataFrame.from_records(running_store[0], index='time') return out, mdata
class Meta(object): """ Stores metadata for Instrument instance, similar to CF-1.6 netCDFdata standard. Parameters ---------- metadata : pandas.DataFrame DataFrame should be indexed by variable name that contains at minimum the standard_name (name), units, and long_name for the data stored in the associated pysat Instrument object. units_label : str String used to label units in storage. Defaults to 'units'. name_label : str String used to label long_name in storage. Defaults to 'long_name'. notes_label : str String used to label 'notes' in storage. Defaults to 'notes' desc_label : str String used to label variable descriptions in storage. Defaults to 'desc' plot_label : str String used to label variables in plots. Defaults to 'label' axis_label : str Label used for axis on a plot. Defaults to 'axis' scale_label : str string used to label plot scaling type in storage. Defaults to 'scale' min_label : str String used to label typical variable value min limit in storage. Defaults to 'value_min' max_label : str String used to label typical variable value max limit in storage. Defaults to 'value_max' fill_label : str String used to label fill value in storage. Defaults to 'fill' per netCDF4 standard Attributes ---------- data : pandas.DataFrame index is variable standard name, 'units', 'long_name', and other defaults are also stored along with additional user provided labels. units_label : str String used to label units in storage. Defaults to 'units'. name_label : str String used to label long_name in storage. Defaults to 'long_name'. notes_label : str String used to label 'notes' in storage. Defaults to 'notes' desc_label : str String used to label variable descriptions in storage. Defaults to 'desc' plot_label : str String used to label variables in plots. Defaults to 'label' axis_label : str Label used for axis on a plot. Defaults to 'axis' scale_label : str string used to label plot scaling type in storage. Defaults to 'scale' min_label : str String used to label typical variable value min limit in storage. Defaults to 'value_min' max_label : str String used to label typical variable value max limit in storage. Defaults to 'value_max' fill_label : str String used to label fill value in storage. Defaults to 'fill' per netCDF4 standard export_nan: list List of labels that should be exported even if their value is nan. By default, metadata with a value of nan will be exluded from export. Notes ----- Meta object preserves the case of variables and attributes as it first receives the data. Subsequent calls to set new metadata with the same variable or attribute will use case of first call. Accessing or setting data thereafter is case insensitive. In practice, use is case insensitive but the original case is preserved. Case preseveration is built in to support writing files with a desired case to meet standards. Metadata for higher order data objects, those that have multiple products under a single variable name in a pysat.Instrument object, are stored by providing a Meta object under the single name. Supports any custom metadata values in addition to the expected metadata attributes (units, name, notes, desc, plot_label, axis, scale, value_min, value_max, and fill). These base attributes may be used to programatically access and set types of metadata regardless of the string values used for the attribute. String values for attributes may need to be changed depending upon the standards of code or files interacting with pysat. Meta objects returned as part of pysat loading routines are automatically updated to use the same values of plot_label, units_label, etc. as found on the pysat.Instrument object. Examples -------- :: # instantiate Meta object, default values for attribute labels are used meta = pysat.Meta() # set a couple base units # note that other base parameters not set below will # be assigned a default value meta['name'] = {'long_name':string, 'units':string} # update 'units' to new value meta['name'] = {'units':string} # update 'long_name' to new value meta['name'] = {'long_name':string} # attach new info with partial information, 'long_name' set to 'name2' meta['name2'] = {'units':string} # units are set to '' by default meta['name3'] = {'long_name':string} # assigning custom meta parameters meta['name4'] = {'units':string, 'long_name':string 'custom1':string, 'custom2':value} meta['name5'] = {'custom1':string, 'custom3':value} # assign multiple variables at once meta[['name1', 'name2']] = {'long_name':[string1, string2], 'units':[string1, string2], 'custom10':[string1, string2]} # assiging metadata for n-Dimensional variables meta2 = pysat.Meta() meta2['name41'] = {'long_name':string, 'units':string} meta2['name42'] = {'long_name':string, 'units':string} meta['name4'] = {'meta':meta2} # or meta['name4'] = meta2 meta['name4'].children['name41'] # mixture of 1D and higher dimensional data meta = pysat.Meta() meta['dm'] = {'units':'hey', 'long_name':'boo'} meta['rpa'] = {'units':'crazy', 'long_name':'boo_whoo'} meta2 = pysat.Meta() meta2[['higher', 'lower']] = {'meta':[meta, None], 'units':[None, 'boo'], 'long_name':[None, 'boohoo']} # assign from another Meta object meta[key1] = meta2[key2] # access fill info for a variable, presuming default label meta[key1, 'fill'] # access same info, even if 'fill' not used to label fill values meta[key1, meta.fill_label] # change a label used by Meta object # note that all instances of fill_label # within the meta object are updated meta.fill_label = '_FillValue' meta.plot_label = 'Special Plot Variable' # this feature is useful when converting metadata within pysat # so that it is consistent with externally imposed file standards """ def __init__(self, metadata=None, units_label='units', name_label='long_name', notes_label='notes', desc_label='desc', plot_label='label', axis_label='axis', scale_label='scale', min_label='value_min', max_label='value_max', fill_label='fill', export_nan=[]): # set mutability of Meta attributes self.mutable = True # set units and name labels directly self._units_label = units_label self._name_label = name_label self._notes_label = notes_label self._desc_label = desc_label self._plot_label = plot_label self._axis_label = axis_label self._scale_label = scale_label self._min_label = min_label self._max_label = max_label self._fill_label = fill_label # by default metadata with a value of nan will not be exported # unless the name is in the _export_nan list. Initialize the list # with the fill label, since it is reasonable to assume that a fill # value of nan would be intended to be exported self._export_nan = [fill_label] + export_nan # init higher order (nD) data structure container, a dict self._ho_data = {} # use any user provided data to instantiate object with data # attirube unit and name labels are called within if metadata is not None: if isinstance(metadata, DataFrame): self._data = metadata # make sure defaults are taken care of for required metadata self.accept_default_labels(self) else: raise ValueError(''.join(('Input must be a pandas DataFrame', 'type. See other constructors for', ' alternate inputs.'))) else: self._data = DataFrame(None, columns=[ self._units_label, self._name_label, self._desc_label, self._plot_label, self._axis_label, self._scale_label, self.notes_label, self._min_label, self._max_label, self._fill_label ]) # establish attributes intrinsic to object, before user can # add any self._base_attr = dir(self) @property def ho_data(self): return self._ho_data @property def data(self): return self._data @data.setter def data(self, new_frame): self._data = new_frame # self.keys = self._data.columns.lower() @ho_data.setter def ho_data(self, new_dict): self._ho_data = new_dict @property def empty(self): """Return boolean True if there is no metadata""" # only need to check on lower data since lower data # is set when higher metadata assigned if self.data.empty: return True else: return False def merge(self, other): """Adds metadata variables to self that are in other but not in self. Parameters ---------- other : pysat.Meta """ for key in other.keys(): if key not in self: # copies over both lower and higher dimensional data self[key] = other[key] def drop(self, names): """Drops variables (names) from metadata.""" # drop lower dimension data self.data = self._data.drop(names, axis=0) # drop higher dimension data for name in names: if name in self._ho_data: _ = self._ho_data.pop(name) def keep(self, keep_names): """Keeps variables (keep_names) while dropping other parameters Parameters ---------- keep_names : list-like variables to keep """ keep_names = [self.var_case_name(name) for name in keep_names] current_names = self._data.index drop_names = [] for name in current_names: if name not in keep_names: drop_names.append(name) self.drop(drop_names) def apply_default_labels(self, other): """Applies labels for default meta labels from self onto other. Parameters ---------- other : Meta Meta object to have default labels applied Returns ------- Meta """ other_updated = other.copy() other_updated.units_label = self.units_label other_updated.name_label = self.name_label other_updated.notes_label = self.notes_label other_updated.desc_label = self.desc_label other_updated.plot_label = self.plot_label other_updated.axis_label = self.axis_label other_updated.scale_label = self.scale_label other_updated.min_label = self.min_label other_updated.max_label = self.max_label other_updated.fill_label = self.fill_label return other_updated def accept_default_labels(self, other): """Applies labels for default meta labels from other onto self. Parameters ---------- other : Meta Meta object to take default labels from Returns ------- Meta """ self.units_label = other.units_label self.name_label = other.name_label self.notes_label = other.notes_label self.desc_label = other.desc_label self.plot_label = other.plot_label self.axis_label = other.axis_label self.scale_label = other.scale_label self.min_label = other.min_label self.max_label = other.max_label self.fill_label = other.fill_label return def __contains__(self, other): """case insensitive check for variable name""" if other.lower() in [i.lower() for i in self.keys()]: return True if other.lower() in [i.lower() for i in self.keys_nD()]: return True return False def __repr__(self): return 'pysat.MetaData' def __str__(self, recurse=True): """String describing Meta instance, variables, and attributes""" # cover 1D parameters if recurse: output_str = 'Metadata for 1D variables\n' else: output_str = '' for ind in self.keys(): output_str += ind.ljust(30) output_str += '\n\n' output_str += 'Tracking the following:\n' for col in self.attrs(): output_str += col.ljust(30) output_str += '\n' if recurse: for item_name in self.keys_nD(): output_str += '\n\n' output_str += 'Metadata for ' + item_name + '\n' output_str += self.ho_data[item_name].__str__(False) return output_str def _insert_default_values(self, input_name): default_str = '' default_nan = np.NaN labels = [ self.units_label, self.name_label, self.notes_label, self.desc_label, self.plot_label, self.axis_label, self.scale_label, self.min_label, self.max_label, self.fill_label ] defaults = [ default_str, input_name, default_str, default_str, input_name, input_name, 'linear', default_nan, default_nan, default_nan ] self._data.loc[input_name, labels] = defaults def __setattr__(self, name, value): """Conditionally sets attributes based on self.mutable flag @properties are assumed to be mutable. We avoid recursively setting properties using method from https://stackoverflow.com/a/15751135 """ # mutable handled explicitly to avoid recursion if name != 'mutable': # check if this attribute is a property propobj = getattr(self.__class__, name, None) if isinstance(propobj, property): # check if the property is settable if propobj.fset is None: raise AttributeError(''.join("can't set attribute - ", "property has no fset")) # make mutable in case fset needs it to be mutable_tmp = self.mutable self.mutable = True # set the property propobj.fset(self, value) # restore mutability flag self.mutable = mutable_tmp else: # a normal attribute if self.mutable: # use Object to avoid recursion super(Meta, self).__setattr__(name, value) else: raise AttributeError(''.join( ("cannot set attribute - ", "object's attributes are immutable"))) else: super(Meta, self).__setattr__(name, value) def __setitem__(self, names, input_data): """Convenience method for adding metadata.""" if isinstance(input_data, dict): # if not passed an iterable, make it one if isinstance(names, basestring): names = [names] for key in input_data: input_data[key] = [input_data[key]] elif isinstance(names, slice) and (names.step is None): # Check for instrument[indx,:] or instrument[idx] usage names = list(self.data.keys()) # make sure the variable names are in good shape # Meta object is case insensitive but case preserving # convert given names into ones Meta has already seen # if new, then input names become the standard names = [self.var_case_name(name) for name in names] for name in names: if name not in self: self._insert_default_values(name) # check if input dict empty if input_data.keys() == []: # meta wasn't actually assigned by user, empty call # we can head out - we've assigned defaults if first data return # perform some checks on the data # make sure number of inputs matches number of metadata inputs for key in input_data: if len(names) != len(input_data[key]): raise ValueError(''.join( ('Length of names and inputs', ' must be equal.'))) # make sure the attribute names are in good shape # check name of attributes against existing attribute names # if attribute name exists somewhere, then case of existing # attribute # will be enforced upon new data by default for consistency keys = [i for i in input_data] for name in keys: new_name = self.attr_case_name(name) if new_name != name: input_data[new_name] = input_data.pop(name) # time to actually add the metadata for key in input_data: if key not in ['children', 'meta']: for i, name in enumerate(names): to_be_set = input_data[key][i] if hasattr(to_be_set, '__iter__') and \ not isinstance(to_be_set, basestring): # we have some list-like object # can only store a single element if len(to_be_set) == 0: # empty list, ensure there is something to_be_set = [''] if isinstance(to_be_set[0], basestring): self._data.loc[name, key] = \ '\n\n'.join(to_be_set) else: warnings.warn(' '.join( ('Array elements are', 'not allowed in meta.', 'Dropping input :', key))) else: self._data.loc[name, key] = to_be_set else: # key is 'meta' or 'children' # process higher order stuff. Meta inputs could be part of # larger multiple parameter assignment # so not all names may actually have 'meta' to add for j, (item, val) in enumerate(zip(names, input_data['meta'])): if val is not None: # assign meta data, recursive call.... # heads to if Meta instance call self[item] = val elif isinstance(input_data, Series): # outputs from Meta object are a Series. # thus this takes in input from a Meta object # set data usind standard assignment via a dict in_dict = input_data.to_dict() if 'children' in in_dict: child = in_dict.pop('children') if child is not None: # if not child.data.empty: self.ho_data[names] = child # remaining items are simply assigned self[names] = in_dict elif isinstance(input_data, Meta): # dealing with higher order data set # names is only a single name here (by choice for support) if (names in self._ho_data) and (input_data.empty): # no actual metadata provided and there is already some # higher order metadata in self return # get Meta approved variable names new_item_name = self.var_case_name(names) # ensure that Meta labels of object to be assigned # are consistent with self # input_data accepts self's labels input_data.accept_default_labels(self) # go through and ensure Meta object to be added has variable and # attribute names consistent with other variables and attributes # this covers custom attributes not handled by default routine # above attr_names = input_data.attrs() new_names = [] for name in attr_names: new_names.append(self.attr_case_name(name)) input_data.data.columns = new_names # same thing for variables var_names = input_data.data.index new_names = [] for name in var_names: new_names.append(self.var_case_name(name)) input_data.data.index = new_names # assign Meta object now that things are consistent with Meta # object settings # but first, make sure there are lower dimension metadata # parameters, passing in an empty dict fills in defaults # if there is no existing metadata info self[new_item_name] = {} # now add to higher order data self._ho_data[new_item_name] = input_data def __getitem__(self, key): """Convenience method for obtaining metadata. Maps to pandas DataFrame.loc method. Examples -------- :: meta['name'] meta[ 'name1', 'units' ] meta[[ 'name1', 'name2'], 'units'] meta[:, 'units'] for higher order data meta[ 'name1', 'subvar', 'units' ] meta[ 'name1', ('units', 'scale') ] """ # if key is a tuple, looking at index, column access pattern def match_name(func, name, names): """Applies func on name(s) depending on name type""" if isinstance(name, basestring): return func(name) elif isinstance(name, slice): return [func(nn) for nn in names[name]] else: # assume iterable return [func(nn) for nn in name] if isinstance(key, tuple): # if tuple length is 2, index, column if len(key) == 2: new_index = match_name(self.var_case_name, key[0], self.data.index) new_name = match_name(self.attr_case_name, key[1], self.data.columns) return self.data.loc[new_index, new_name] # if tuple length is 3, index, child_index, column elif len(key) == 3: new_index = self.var_case_name(key[0]) new_child_index = self.var_case_name(key[1]) new_name = self.attr_case_name(key[2]) return self.ho_data[new_index].data.loc[new_child_index, new_name] elif isinstance(key, list): return self[key, :] elif isinstance(key, basestring): # ensure variable is present somewhere if key in self: # get case preserved string for variable name new_key = self.var_case_name(key) # if new_key in self.keys(): # don't need to check if in lower, all variables # are always in the lower metadata meta_row = self.data.loc[new_key] if new_key in self.keys_nD(): meta_row.at['children'] = self.ho_data[new_key].copy() else: # empty_meta = Meta() # self.apply_default_labels(empty_meta) # Following line issues a pandas SettingWithCopyWarning meta_row.at['children'] = None # empty_meta return meta_row # else: # return pds.Series([self.ho_data[new_key].copy()], # index=['children']) else: raise KeyError('Key not found in MetaData') else: raise NotImplementedError( "No way to handle MetaData key {}".format(key.__repr__())) def _label_setter(self, new_label, current_label, attr_label, default=np.NaN, use_names_default=False): """Generalized setter of default meta attributes Parameters ---------- new_label : str New label to use in the Meta object current_label : str The hidden attribute to be updated that actually stores metadata default : Deafult setting to use for label if there is no attribute value use_names_default : bool if True, MetaData variable names are used as the default value for the specified Meta attributes settings Examples -------- : @name_label.setter def name_label(self, new_label): self._label_setter(new_label, self._name_label, use_names_default=True) Notes ----- Not intended for end user """ if new_label not in self.attrs(): # new label not in metadata, including case # update existing label, if present if current_label in self.attrs(): # old label exists and has expected case self.data.loc[:, new_label] = self.data.loc[:, current_label] self.data.drop(current_label, axis=1, inplace=True) else: if self.has_attr(current_label): # there is something like label, wrong case though current_label = self.attr_case_name(current_label) self.data.loc[:, new_label] = \ self.data.loc[:, current_label] self.data.drop(current_label, axis=1, inplace=True) else: # there is no existing label # setting for the first time if use_names_default: self.data[new_label] = self.data.index else: self.data[new_label] = default # check higher order structures as well # recursively change labels here for key in self.keys_nD(): setattr(self.ho_data[key], attr_label, new_label) # now update 'hidden' attribute value # current_label = new_label setattr(self, ''.join(('_', attr_label)), new_label) @property def units_label(self): return self._units_label @property def name_label(self): return self._name_label @property def notes_label(self): return self._notes_label @property def desc_label(self): return self._desc_label @property def plot_label(self): return self._plot_label @property def axis_label(self): return self._axis_label @property def scale_label(self): return self._scale_label @property def min_label(self): return self._min_label @property def max_label(self): return self._max_label @property def fill_label(self): return self._fill_label @units_label.setter def units_label(self, new_label): self._label_setter(new_label, self._units_label, 'units_label', '') @name_label.setter def name_label(self, new_label): self._label_setter(new_label, self._name_label, 'name_label', use_names_default=True) @notes_label.setter def notes_label(self, new_label): self._label_setter(new_label, self._notes_label, 'notes_label', '') @desc_label.setter def desc_label(self, new_label): self._label_setter(new_label, self._desc_label, 'desc_label', '') @plot_label.setter def plot_label(self, new_label): self._label_setter(new_label, self._plot_label, 'plot_label', use_names_default=True) @axis_label.setter def axis_label(self, new_label): self._label_setter(new_label, self._axis_label, 'axis_label', use_names_default=True) @scale_label.setter def scale_label(self, new_label): self._label_setter(new_label, self._scale_label, 'scale_label', 'linear') @min_label.setter def min_label(self, new_label): self._label_setter(new_label, self._min_label, 'min_label', np.NaN) @max_label.setter def max_label(self, new_label): self._label_setter(new_label, self._max_label, 'max_label', np.NaN) @fill_label.setter def fill_label(self, new_label): self._label_setter(new_label, self._fill_label, 'fill_label', np.NaN) def var_case_name(self, name): """Provides stored name (case preserved) for case insensitive input If name is not found (case-insensitive check) then name is returned, as input. This function is intended to be used to help ensure the case of a given variable name is the same across the Meta object. Parameters ---------- name : str variable name in any case Returns ------- str string with case preserved as in metaobject """ lower_name = name.lower() if name in self: for i in self.keys(): if lower_name == i.lower(): return i for i in self.keys_nD(): if lower_name == i.lower(): return i return name def keys(self): """Yields variable names stored for 1D variables""" for i in self.data.index: yield i def keys_nD(self): """Yields keys for higher order metadata""" for i in self.ho_data: yield i def attrs(self): """Yields metadata products stored for each variable name""" for i in self.data.columns: yield i def has_attr(self, name): """Returns boolean indicating presence of given attribute name Case-insensitive check Notes ----- Does not check higher order meta objects Parameters ---------- name : str name of variable to get stored case form Returns ------- bool True if case-insesitive check for attribute name is True """ if name.lower() in [i.lower() for i in self.data.columns]: return True return False def attr_case_name(self, name): """Returns preserved case name for case insensitive value of name. Checks first within standard attributes. If not found there, checks attributes for higher order data structures. If not found, returns supplied name as it is available for use. Intended to be used to help ensure that the same case is applied to all repetitions of a given variable name. Parameters ---------- name : str name of variable to get stored case form Returns ------- str name in proper case """ lower_name = name.lower() for i in self.attrs(): if lower_name == i.lower(): return i # check if attribute present in higher order structures for key in self.keys_nD(): for i in self[key].children.attrs(): if lower_name == i.lower(): return i # nothing was found if still here # pass name back, free to be whatever return name def concat(self, other, strict=False): """Concats two metadata objects together. Parameters ---------- other : Meta Meta object to be concatenated strict : bool if True, ensure there are no duplicate variable names Notes ----- Uses units and name label of self if other is different Returns ------- Meta Concatenated object """ mdata = self.copy() # checks if strict: for key in other.keys(): if key in mdata: raise RuntimeError(''.join( ('Duplicated keys (variable ', 'names) across Meta ', 'objects in keys().'))) for key in other.keys_nD(): if key in mdata: raise RuntimeError(''.join( ('Duplicated keys (variable ', 'names) across Meta ' 'objects in keys_nD().'))) # make sure labels between the two objects are the same other_updated = self.apply_default_labels(other) # concat 1D metadata in data frames to copy of # current metadata for key in other_updated.keys(): mdata.data.loc[key] = other.data.loc[key] # add together higher order data for key in other_updated.keys_nD(): mdata.ho_data[key] = other.ho_data[key] return mdata def copy(self): from copy import deepcopy as deepcopy """Deep copy of the meta object.""" return deepcopy(self) def pop(self, name): """Remove and return metadata about variable Parameters ---------- name : str variable name Returns ------- pandas.Series Series of metadata for variable """ # check if present if name in self: # get case preserved name for variable new_name = self.var_case_name(name) # check if 1D or nD if new_name in self.keys(): output = self[new_name] self.data.drop(new_name, inplace=True, axis=0) else: output = self.ho_data.pop(new_name) return output else: raise KeyError('Key not present in metadata variables') def transfer_attributes_to_instrument(self, inst, strict_names=False): """Transfer non-standard attributes in Meta to Instrument object. Pysat's load_netCDF and similar routines are only able to attach netCDF4 attributes to a Meta object. This routine identifies these attributes and removes them from the Meta object. Intent is to support simple transfers to the pysat.Instrument object. Will not transfer names that conflict with pysat default attributes. Parameters ---------- inst : pysat.Instrument Instrument object to transfer attributes to strict_names : boolean (False) If True, produces an error if the Instrument object already has an attribute with the same name to be copied. Returns ------- None pysat.Instrument object modified in place with new attributes """ # base Instrument attributes banned = inst._base_attr # get base attribute set, and attributes attached to instance base_attrb = self._base_attr this_attrb = dir(self) # collect these attributes into a dict adict = {} transfer_key = [] for key in this_attrb: if key not in banned: if key not in base_attrb: # don't store _ leading attributes if key[0] != '_': adict[key] = self.__getattribute__(key) transfer_key.append(key) # store any non-standard attributes in Instrument # get list of instrument objects attributes first # to check if a duplicate # instrument attributes are now inst.meta attributes inst_attr = dir(inst) for key in transfer_key: if key not in banned: if key not in inst_attr: inst.__setattr__(key, adict[key]) else: if not strict_names: # new_name = 'pysat_attr_'+key inst.__setattr__(key, adict[key]) else: raise RuntimeError(''.join( ('Attribute ', key, ' attached to Meta object', ' can not be transferred', ' as it already exists in', ' the Instrument object.'))) # return inst def __eq__(self, other): """ Check equality between Meta instances. Good for testing. Checks if variable names, attribute names, and metadata values are all equal between to Meta objects. Note that this comparison treats np.NaN == np.NaN as True. Name comparison is case-sensitive. """ if isinstance(other, Meta): # check first if variables and attributes are the same # quick check on length keys1 = [i for i in self.keys()] keys2 = [i for i in other.keys()] if len(keys1) != len(keys2): return False # now iterate over each of the keys in the first one # don't need to iterate over second one, if all of the first # in the second we are good. No more or less items in second from # check earlier. for key in keys1: if key not in keys2: return False # do same checks on attributes attrs1 = [i for i in self.attrs()] attrs2 = [i for i in other.attrs()] if len(attrs1) != len(attrs2): return False for attr in attrs1: if attr not in attrs2: return False # now check the values of all elements now that we know all # variable and attribute names are the same for key in self.keys(): for attr in self.attrs(): if not (self[key, attr] == other[key, attr]): # np.nan is not equal to anything # if both values are NaN, ok in my book try: if not (np.isnan(self[key, attr]) and np.isnan(other[key, attr])): # one or both are not NaN and they aren't equal # test failed return False except TypeError: # comparison above gets unhappy with string inputs return False # check through higher order products # in the same manner as code above keys1 = [i for i in self.keys_nD()] keys2 = [i for i in other.keys_nD()] if len(keys1) != len(keys2): return False for key in keys1: if key not in keys2: return False # do same check on all sub variables within each nD key for key in self.keys_nD(): keys1 = [i for i in self[key].children.keys()] keys2 = [i for i in other[key].children.keys()] if len(keys1) != len(keys2): return False for key_check in keys1: if key_check not in keys2: return False # check if attributes are the same attrs1 = [i for i in self[key].children.attrs()] attrs2 = [i for i in other[key].children.attrs()] if len(attrs1) != len(attrs2): return False for attr in attrs1: if attr not in attrs2: return False # now time to check if all elements are individually equal for key2 in self[key].children.keys(): for attr in self[key].children.attrs(): if not (self[key].children[key2, attr] == other[key].children[key2, attr]): try: if not (np.isnan(self[key].children[key2, attr]) and np.isnan( other[key].children[key2, attr])): return False except TypeError: # comparison above gets unhappy with string # inputs return False # if we made it this far, things are good return True else: # wasn't even the correct class return False @classmethod def from_csv(cls, name=None, col_names=None, sep=None, **kwargs): """Create instrument metadata object from csv. Parameters ---------- name : string absolute filename for csv file or name of file stored in pandas instruments location col_names : list-like collection of strings column names in csv and resultant meta object sep : string column seperator for supplied csv filename Note ---- column names must include at least ['name', 'long_name', 'units'], assumed if col_names is None. """ import pysat req_names = ['name', 'long_name', 'units'] if col_names is None: col_names = req_names elif not all([i in col_names for i in req_names]): raise ValueError('col_names must include name, long_name, units.') if sep is None: sep = ',' if name is None: raise ValueError('Must supply an instrument name or file path.') elif not isinstance(name, str): raise ValueError('keyword name must be related to a string') elif not os.path.isfile(name): # Not a real file, assume input is a pysat instrument name # and look in the standard pysat location. test = os.path.join(pysat.__path__[0], 'instruments', name) if os.path.isfile(test): name = test else: # trying to form an absolute path for success test = os.path.abspath(name) if not os.path.isfile(test): raise ValueError("Unable to create valid file path.") else: # success name = test mdata = pds.read_csv(name, names=col_names, sep=sep, **kwargs) if not mdata.empty: # make sure the data name is the index mdata.index = mdata['name'] del mdata['name'] return cls(metadata=mdata) else: raise ValueError('Unable to retrieve information from ' + name)
class Meta(object): """ Stores metadata for Instrument instance, similar to CF-1.6 netCDFdata standard. Parameters ---------- metadata : pandas.DataFrame DataFrame should be indexed by variable name that contains at minimum the standard_name (name), units, and long_name for the data stored in the associated pysat Instrument object. units_label : str String used to label units in storage. Defaults to 'units'. name_label : str String used to label long_name in storage. Defaults to 'long_name'. notes_label : str String used to label 'notes' in storage. Defaults to 'notes' desc_label : str String used to label variable descriptions in storage. Defaults to 'desc' plot_label : str String used to label variables in plots. Defaults to 'label' axis_label : str Label used for axis on a plot. Defaults to 'axis' scale_label : str string used to label plot scaling type in storage. Defaults to 'scale' min_label : str String used to label typical variable value min limit in storage. Defaults to 'value_min' max_label : str String used to label typical variable value max limit in storage. Defaults to 'value_max' fill_label : str String used to label fill value in storage. Defaults to 'fill' per netCDF4 standard Attributes ---------- data : pandas.DataFrame index is variable standard name, 'units', 'long_name', and other defaults are also stored along with additional user provided labels. units_label : str String used to label units in storage. Defaults to 'units'. name_label : str String used to label long_name in storage. Defaults to 'long_name'. notes_label : str String used to label 'notes' in storage. Defaults to 'notes' desc_label : str String used to label variable descriptions in storage. Defaults to 'desc' plot_label : str String used to label variables in plots. Defaults to 'label' axis_label : str Label used for axis on a plot. Defaults to 'axis' scale_label : str string used to label plot scaling type in storage. Defaults to 'scale' min_label : str String used to label typical variable value min limit in storage. Defaults to 'value_min' max_label : str String used to label typical variable value max limit in storage. Defaults to 'value_max' fill_label : str String used to label fill value in storage. Defaults to 'fill' per netCDF4 standard Notes ----- Meta object preserves the case of variables and attributes as it first receives the data. Subsequent calls to set new metadata with the same variable or attribute will use case of first call. Accessing or setting data thereafter is case insensitive. In practice, use is case insensitive but the original case is preserved. Case preseveration is built in to support writing files with a desired case to meet standards. Metadata for higher order data objects, those that have multiple products under a single variable name in a pysat.Instrument object, are stored by providing a Meta object under the single name. Supports any custom metadata values in addition to the expected metadata attributes (units, name, notes, desc, plot_label, axis, scale, value_min, value_max, and fill). These base attributes may be used to programatically access and set types of metadata regardless of the string values used for the attribute. String values for attributes may need to be changed depending upon the standards of code or files interacting with pysat. Meta objects returned as part of pysat loading routines are automatically updated to use the same values of plot_label, units_label, etc. as found on the pysat.Instrument object. Examples -------- :: # instantiate Meta object, default values for attribute labels are used meta = pysat.Meta() # set a couple base units # note that other base parameters not set below will # be assigned a default value meta['name'] = {'long_name':string, 'units':string} # update 'units' to new value meta['name'] = {'units':string} # update 'long_name' to new value meta['name'] = {'long_name':string} # attach new info with partial information, 'long_name' set to 'name2' meta['name2'] = {'units':string} # units are set to '' by default meta['name3'] = {'long_name':string} # assigning custom meta parameters meta['name4'] = {'units':string, 'long_name':string 'custom1':string, 'custom2':value} meta['name5'] = {'custom1':string, 'custom3':value} # assign multiple variables at once meta[['name1', 'name2']] = {'long_name':[string1, string2], 'units':[string1, string2], 'custom10':[string1, string2]} # assiging metadata for n-Dimensional variables meta2 = pysat.Meta() meta2['name41'] = {'long_name':string, 'units':string} meta2['name42'] = {'long_name':string, 'units':string} meta['name4'] = {'meta':meta2} # or meta['name4'] = meta2 meta['name4'].children['name41'] # mixture of 1D and higher dimensional data meta = pysat.Meta() meta['dm'] = {'units':'hey', 'long_name':'boo'} meta['rpa'] = {'units':'crazy', 'long_name':'boo_whoo'} meta2 = pysat.Meta() meta2[['higher', 'lower']] = {'meta':[meta, None], 'units':[None, 'boo'], 'long_name':[None, 'boohoo']} # assign from another Meta object meta[key1] = meta2[key2] # access fill info for a variable, presuming default label meta[key1, 'fill'] # access same info, even if 'fill' not used to label fill values meta[key1, meta.fill_label] # change a label used by Meta object # note that all instances of fill_label # within the meta object are updated meta.fill_label = '_FillValue' meta.plot_label = 'Special Plot Variable' # this feature is useful when converting metadata within pysat # so that it is consistent with externally imposed file standards """ def __init__(self, metadata=None, units_label='units', name_label='long_name', notes_label='notes', desc_label='desc', plot_label='label', axis_label='axis', scale_label='scale', min_label='value_min', max_label='value_max', fill_label='fill'): # set units and name labels directly self._units_label = units_label self._name_label = name_label self._notes_label = notes_label self._desc_label = desc_label self._plot_label = plot_label self._axis_label = axis_label self._scale_label = scale_label self._min_label = min_label self._max_label = max_label self._fill_label = fill_label # init higher order (nD) data structure container, a dict self._ho_data = {} # use any user provided data to instantiate object with data # attirube unit and name labels are called within if metadata is not None: if isinstance(metadata, DataFrame): self._data = metadata # make sure defaults are taken care of for required metadata self.accept_default_labels(self) else: raise ValueError("Input must be a pandas DataFrame type. "+ "See other constructors for alternate inputs.") else: self._data = DataFrame(None, columns=[self._units_label, self._name_label, self._desc_label, self._plot_label, self._axis_label, self._scale_label, self.notes_label, self._min_label, self._max_label, self._fill_label]) # establish attributes intrinsic to object, before user can # add any self._base_attr = dir(self) @property def ho_data(self): return self._ho_data @property def data(self): return self._data @data.setter def data(self, new_frame): self._data = new_frame # self.keys = self._data.columns.lower() @ho_data.setter def ho_data(self, new_dict): self._ho_data = new_dict @property def empty(self): """Return boolean True if there is no metadata""" # only need to check on lower data since lower data # is set when higher metadata assigned if self.data.empty: return True else: return False def merge(self, other): """Adds metadata variables to self that are in other but not in self. Parameters ---------- other : pysat.Meta """ for key in other.keys(): if key not in self: # copies over both lower and higher dimensional data self[key] = other[key] def drop(self, names): """Drops variables (names) from metadata.""" # drop lower dimension data self._data = self._data.drop(names, axis=0) # drop higher dimension data for name in names: if name in self._ho_data: _ = self._ho_data.pop(name) def keep(self, keep_names): """Keeps variables (keep_names) while dropping other parameters""" current_names = self._data.columns drop_names = [] for name in current_names: if name not in keep_names: drop_names.append(name) self.drop(drop_names) # def default_labels_and_values(self, name): # """Returns dictionary of default meta labels and values for name variable. # # Metadata is automatically tracked for various properties, name, # long_name, units, description, etc. Each of these values (labels) # corresponds to a given string (values). # # Parameters # ---------- # name : list_like of str # variable names to get default metadata parameters for # # Returns # ------- # dict # keys are metadata labels used within Meta object, values are the default # values assigned if data is never specified by user # # """ # num = len(name) # default_str = [''] * num # default_nan = [np.NaN] * num # return {self.units_label: default_str, # self.name_label: name, # self.notes_label: default_str, # self.desc_label: default_str, # self.plot_label: name, # self.axis_label: name, # self.scale_label: ['linear'] * num, # self.min_label: default_nan, # self.max_label: default_nan, # self.fill_label: default_nan} def apply_default_labels(self, other): """Applies labels for default meta labels from self onto other. Parameters ---------- other : Meta Meta object to have default labels applied Returns ------- Meta """ other_updated = other.copy() other_updated.units_label = self.units_label other_updated.name_label = self.name_label other_updated.notes_label = self.notes_label other_updated.desc_label = self.desc_label other_updated.plot_label = self.plot_label other_updated.axis_label = self.axis_label other_updated.scale_label = self.scale_label other_updated.min_label = self.min_label other_updated.max_label = self.max_label other_updated.fill_label = self.fill_label return other def accept_default_labels(self, other): """Applies labels for default meta labels from other onto self. Parameters ---------- other : Meta Meta object to take default labels from Returns ------- Meta """ self.units_label = other.units_label self.name_label = other.name_label self.notes_label = other.notes_label self.desc_label = other.desc_label self.plot_label = other.plot_label self.axis_label = other.axis_label self.scale_label = other.scale_label self.min_label = other.min_label self.max_label = other.max_label self.fill_label = other.fill_label return def __contains__(self, other): """case insensitive check for variable name""" if other.lower() in [i.lower() for i in self.keys()]: return True if other.lower() in [i.lower() for i in self.keys_nD()]: return True return False def __repr__(self): return 'pysat.MetaData' def __str__(self, recurse=True): """String describing Meta instance, variables, and attributes""" # cover 1D parameters if recurse: output_str = 'Metadata for 1D variables\n' else: output_str = '' for ind in self.keys(): output_str += ind.ljust(30) output_str += '\n\n' output_str += 'Tracking the following:\n' for col in self.attrs(): output_str += col.ljust(30) output_str += '\n' if recurse: for item_name in self.keys_nD(): output_str += '\n\n' output_str += 'Metadata for '+item_name+'\n' output_str += self.ho_data[item_name].__str__(False) return output_str def _insert_default_values(self, input_name): default_str = '' default_nan = np.NaN labels = [self.units_label, self.name_label, self.notes_label, self.desc_label, self.plot_label, self.axis_label, self.scale_label, self.min_label, self.max_label, self.fill_label] defaults = [default_str, input_name, default_str, default_str, input_name, input_name, 'linear', default_nan, default_nan, default_nan] self._data.loc[input_name, labels] = defaults def __setitem__(self, names, input_data): """Convenience method for adding metadata.""" if isinstance(input_data, dict): # if not passed an iterable, make it one if isinstance(names, basestring): names = [names] for key in input_data: input_data[key] = [input_data[key]] # make sure the variable names are in good shape # Meta object is case insensitive but case preserving # convert given names into ones Meta has already seen # if new, then input names become the standard names = [self.var_case_name(name) for name in names] for name in names: if name not in self: self._insert_default_values(name) # check if input dict empty if input_data.keys() == []: # meta wasn't actually assigned by user, empty call # we can head out - we've assigned defaults if first data return # perform some checks on the data # make sure number of inputs matches number of metadata inputs for key in input_data: if len(names) != len(input_data[key]): raise ValueError('Length of names and inputs must be equal.') # make sure the attribute names are in good shape # check name of attributes against existing attribute names # if attribute name exists somewhere, then case of existing attribute # will be enforced upon new data by default for consistency keys = [i for i in input_data] for name in keys: new_name = self.attr_case_name(name) if new_name != name: input_data[new_name] = input_data.pop(name) # time to actually add the metadata for key in input_data: if key not in ['children', 'meta']: for i, name in enumerate(names): to_be_set = input_data[key][i] if hasattr(to_be_set, '__iter__') and not isinstance(to_be_set, basestring): if isinstance(to_be_set[0], basestring): self._data.loc[name, key] = '\n\n'.join(to_be_set) else: warnings.warn(' '.join(('Array elements are disallowed in meta.', 'Dropping input :', key))) else: self._data.loc[name, key] = to_be_set else: # key is 'meta' or 'children' # process higher order stuff. Meta inputs could be part of # larger multiple parameter assignment # so not all names may actually have 'meta' to add for j, (item, val) in enumerate(zip(names, input_data['meta'])): if val is not None: # assign meta data, recursive call.... # heads to if Meta instance call self[item] = val elif isinstance(input_data, Series): # outputs from Meta object are a Series. # thus this takes in input from a Meta object # set data usind standard assignment via a dict in_dict = input_data.to_dict() if 'children' in in_dict: child = in_dict.pop('children') if child is not None: # if not child.data.empty: self.ho_data[names] = child # remaining items are simply assigned self[names] = in_dict elif isinstance(input_data, Meta): # dealing with higher order data set # names is only a single name here (by choice for support) if (names in self._ho_data) and (input_data.empty): # no actual metadata provided and there is already some # higher order metadata in self return # get Meta approved variable names new_item_name = self.var_case_name(names) # ensure that Meta labels of object to be assigned # are consistent with self # input_data accepts self's labels input_data.accept_default_labels(self) # go through and ensure Meta object to be added has variable and # attribute names consistent with other variables and attributes # this covers custom attributes not handled by default routine above attr_names = input_data.attrs() new_names = [] for name in attr_names: new_names.append(self.attr_case_name(name)) input_data.data.columns = new_names # same thing for variables var_names = input_data.data.index new_names = [] for name in var_names: new_names.append(self.var_case_name(name)) input_data.data.index = new_names # assign Meta object now that things are consistent with Meta # object settings # but first, make sure there are lower dimension metadata # parameters, passing in an empty dict fills in defaults # if there is no existing metadata info self[new_item_name] = {} # now add to higher order data self._ho_data[new_item_name] = input_data def __getitem__(self, key): """Convenience method for obtaining metadata. Maps to pandas DataFrame.loc method. Examples -------- :: meta['name'] meta[ 'name1', 'units' ] for higher order data meta[ 'name1', 'subvar', 'units' ] """ # if key is a tuple, looking at index, column access pattern if isinstance(key, tuple): # if tuple length is 2, index, column if len(key) == 2: new_index = self.var_case_name(key[0]) new_name = self.attr_case_name(key[1]) return self.data.loc[new_index, new_name] # if tuple length is 3, index, child_index, column elif len(key) == 3: new_index = self.var_case_name(key[0]) new_child_index = self.var_case_name(key[1]) new_name = self.attr_case_name(key[2]) return self.ho_data[new_index].data.loc[new_child_index, new_name] else: # ensure variable is present somewhere if key in self: # get case preserved string for variable name new_key = self.var_case_name(key) # if new_key in self.keys(): # don't need to check if in lower, all variables # are always in the lower metadata meta_row = self.data.loc[new_key] if new_key in self.keys_nD(): meta_row.at['children'] = self.ho_data[new_key].copy() else: # empty_meta = Meta() # self.apply_default_labels(empty_meta) meta_row.at['children'] = None #empty_meta return meta_row # else: # return pds.Series([self.ho_data[new_key].copy()], index=['children']) else: raise KeyError('Key not found in MetaData') def _label_setter(self, new_label, current_label, attr_label, default=np.NaN, use_names_default=False): """Generalized setter of default meta attributes Parameters ---------- new_label : str New label to use in the Meta object current_label : str The hidden attribute to be updated that actually stores metadata default : Deafult setting to use for label if there is no attribute value use_names_default : bool if True, MetaData variable names are used as the default value for the specified Meta attributes settings Examples -------- : @name_label.setter def name_label(self, new_label): self._label_setter(new_label, self._name_label, use_names_default=True) Notes ----- Not intended for end user """ if new_label not in self.attrs(): # new label not in metadata, including case # update existing label, if present if current_label in self.attrs(): # old label exists and has expected case self.data.loc[:, new_label] = self.data.loc[:, current_label] self.data.drop(current_label, axis=1, inplace=True) else: if self.has_attr(current_label): # there is something like label, wrong case though current_label = self.attr_case_name(current_label) self.data.loc[:, new_label] = self.data.loc[:, current_label] self.data.drop(current_label, axis=1, inplace=True) else: # there is no existing label # setting for the first time if use_names_default: self.data[new_label] = self.data.index else: self.data[new_label] = default # check higher order structures as well # recursively change labels here for key in self.keys_nD(): setattr(self.ho_data[key], attr_label, new_label) # now update 'hidden' attribute value # current_label = new_label setattr(self, ''.join(('_',attr_label)), new_label) @property def units_label(self): return self._units_label @property def name_label(self): return self._name_label @property def notes_label(self): return self._notes_label @property def desc_label(self): return self._desc_label @property def plot_label(self): return self._plot_label @property def axis_label(self): return self._axis_label @property def scale_label(self): return self._scale_label @property def min_label(self): return self._min_label @property def max_label(self): return self._max_label @property def fill_label(self): return self._fill_label @units_label.setter def units_label(self, new_label): self._label_setter(new_label, self._units_label, 'units_label', '') @name_label.setter def name_label(self, new_label): self._label_setter(new_label, self._name_label, 'name_label', use_names_default=True) @notes_label.setter def notes_label(self, new_label): self._label_setter(new_label, self._notes_label, 'notes_label', '') @desc_label.setter def desc_label(self, new_label): self._label_setter(new_label, self._desc_label, 'desc_label', '') @plot_label.setter def plot_label(self, new_label): self._label_setter(new_label, self._plot_label, 'plot_label', use_names_default=True) @axis_label.setter def axis_label(self, new_label): self._label_setter(new_label, self._axis_label, 'axis_label', use_names_default=True) @scale_label.setter def scale_label(self, new_label): self._label_setter(new_label, self._scale_label, 'scale_label', 'linear') @min_label.setter def min_label(self, new_label): self._label_setter(new_label, self._min_label, 'min_label', np.NaN) @max_label.setter def max_label(self, new_label): self._label_setter(new_label, self._max_label, 'max_label', np.NaN) @fill_label.setter def fill_label(self, new_label): self._label_setter(new_label, self._fill_label, 'fill_label', np.NaN) def var_case_name(self, name): """Provides stored name (case preserved) for case insensitive input If name is not found (case-insensitive check) then name is returned, as input. This function is intended to be used to help ensure the case of a given variable name is the same across the Meta object. Parameters ---------- name : str variable name in any case Returns ------- str string with case preserved as in metaobject """ lower_name = name.lower() if name in self: for i in self.keys(): if lower_name == i.lower(): return i for i in self.keys_nD(): if lower_name == i.lower(): return i return name def keys(self): """Yields variable names stored for 1D variables""" for i in self.data.index: yield i def keys_nD(self): """Yields keys for higher order metadata""" for i in self.ho_data: yield i def attrs(self): """Yields metadata products stored for each variable name""" for i in self.data.columns: yield i def has_attr(self, name): """Returns boolean indicating presence of given attribute name Case-insensitive check Notes ----- Does not check higher order meta objects Parameters ---------- name : str name of variable to get stored case form Returns ------- bool True if case-insesitive check for attribute name is True """ if name.lower() in [i.lower() for i in self.data.columns]: return True return False def attr_case_name(self, name): """Returns preserved case name for case insensitive value of name. Checks first within standard attributes. If not found there, checks attributes for higher order data structures. If not found, returns supplied name as it is available for use. Intended to be used to help ensure that the same case is applied to all repetitions of a given variable name. Parameters ---------- name : str name of variable to get stored case form Returns ------- str name in proper case """ lower_name = name.lower() for i in self.attrs(): if lower_name == i.lower(): return i # check if attribute present in higher order structures for key in self.keys_nD(): for i in self[key].children.attrs(): if lower_name == i.lower(): return i # nothing was found if still here # pass name back, free to be whatever return name def concat(self, other, strict=False): """Concats two metadata objects together. Parameters ---------- other : Meta Meta object to be concatenated strict : bool if True, ensure there are no duplicate variable names Notes ----- Uses units and name label of self if other is different Returns ------- Meta Concatenated object """ mdata = self.copy() # checks if strict: for key in other.keys(): if key in mdata: raise RuntimeError('Duplicated keys (variable names) ' + 'across Meta objects in keys().') for key in other.keys_nD(): if key in mdata: raise RuntimeError('Duplicated keys (variable names) across ' 'Meta objects in keys_nD().') # make sure labels between the two objects are the same other_updated = self.apply_default_labels(other) # concat 1D metadata in data frames to copy of # current metadata # <<<<<<< ho_meta_fix for key in other_updated.keys(): mdata.data.loc[key] = other.data.loc[key] # add together higher order data for key in other_updated.keys_nD(): mdata.ho_data[key] = other.ho_data[key] # ======= # for key in other_updated.keys(): # mdata[key] = other_updated[key] # # add together higher order data # for key in other_updated.keys_nD(): # mdata[key] = other_updated[key] return mdata def copy(self): from copy import deepcopy as deepcopy """Deep copy of the meta object.""" return deepcopy(self) def pop(self, name): """Remove and return metadata about variable Parameters ---------- name : str variable name Returns ------- pandas.Series Series of metadata for variable """ # check if present if name in self: # get case preserved name for variable new_name = self.var_case_name(name) # check if 1D or nD if new_name in self.keys(): output = self[new_name] self.data.drop(new_name, inplace=True, axis=0) else: output = self.ho_data.pop(new_name) return output else: raise KeyError('Key not present in metadata variables') def transfer_attributes_to_instrument(self, inst, strict_names=False): """Transfer non-standard attributes in Meta to Instrument object. Pysat's load_netCDF and similar routines are only able to attach netCDF4 attributes to a Meta object. This routine identifies these attributes and removes them from the Meta object. Intent is to support simple transfers to the pysat.Instrument object. Will not transfer names that conflict with pysat default attributes. Parameters ---------- inst : pysat.Instrument Instrument object to transfer attributes to strict_names : boolean (False) If True, produces an error if the Instrument object already has an attribute with the same name to be copied. Returns ------- None pysat.Instrument object modified in place with new attributes """ # base Instrument attributes banned = inst._base_attr # get base attribute set, and attributes attached to instance base_attrb = self._base_attr this_attrb = dir(self) # collect these attributes into a dict adict = {} transfer_key = [] for key in this_attrb: if key not in banned: if key not in base_attrb: # don't store _ leading attributes if key[0] != '_': adict[key] = self.__getattribute__(key) transfer_key.append(key) # store any non-standard attributes in Instrument # get list of instrument objects attributes first # to check if a duplicate inst_attr = dir(inst) for key in transfer_key: if key not in banned: if key not in inst_attr: inst.__setattr__(key, adict[key]) else: if not strict_names: # new_name = 'pysat_attr_'+key inst.__setattr__(key, adict[key]) else: raise RuntimeError('Attribute ' + key + 'attached to Meta object can not be ' + 'transferred as it already exists' + ' in the Instrument object.') # return inst def __eq__(self, other): """ Check equality between Meta instances. Good for testing. Checks if variable names, attribute names, and metadata values are all equal between to Meta objects. Note that this comparison treats np.NaN == np.NaN as True. Name comparison is case-sensitive. """ if isinstance(other, Meta): # check first if variables and attributes are the same # quick check on length keys1 = [i for i in self.keys()] keys2 = [i for i in other.keys()] if len(keys1) != len(keys2): return False # now iterate over each of the keys in the first one # don't need to iterate over second one, if all of the first # in the second we are good. No more or less items in second from # check earlier. for key in keys1: if key not in keys2: return False # do same checks on attributes attrs1 = [i for i in self.attrs()] attrs2 = [i for i in other.attrs()] if len(attrs1) != len(attrs2): return False for attr in attrs1: if attr not in attrs2: return False # now check the values of all elements now that we know all variable # and attribute names are the same for key in self.keys(): for attr in self.attrs(): if not (self[key, attr] == other[key, attr]): # np.nan is not equal to anything # if both values are NaN, ok in my book try: if not (np.isnan(self[key, attr]) and np.isnan(other[key, attr])): # one or both are not NaN and they aren't equal # test failed return False except TypeError: # comparison above gets unhappy with string inputs return False # check through higher order products # in the same manner as code above keys1 = [i for i in self.keys_nD()] keys2 = [i for i in other.keys_nD()] if len(keys1) != len(keys2): return False for key in keys1: if key not in keys2: return False # do same check on all sub variables within each nD key for key in self.keys_nD(): keys1 = [i for i in self[key].children.keys()] keys2 = [i for i in other[key].children.keys()] if len(keys1) != len(keys2): return False for key_check in keys1: if key_check not in keys2: return False # check if attributes are the same attrs1 = [i for i in self[key].children.attrs()] attrs2 = [i for i in other[key].children.attrs()] if len(attrs1) != len(attrs2): return False for attr in attrs1: if attr not in attrs2: return False # now time to check if all elements are individually equal for key2 in self[key].children.keys(): for attr in self[key].children.attrs(): if not (self[key].children[key2, attr] == other[key].children[key2, attr]): try: if not (np.isnan(self[key].children[key2, attr]) and np.isnan(other[key].children[key2, attr])): return False except TypeError: # comparison above gets unhappy with string inputs return False # if we made it this far, things are good return True else: # wasn't even the correct class return False @classmethod def from_csv(cls, name=None, col_names=None, sep=None, **kwargs): """Create instrument metadata object from csv. Parameters ---------- name : string absolute filename for csv file or name of file stored in pandas instruments location col_names : list-like collection of strings column names in csv and resultant meta object sep : string column seperator for supplied csv filename Note ---- column names must include at least ['name', 'long_name', 'units'], assumed if col_names is None. """ import pysat req_names = ['name','long_name','units'] if col_names is None: col_names = req_names elif not all([i in col_names for i in req_names]): raise ValueError('col_names must include name, long_name, units.') if sep is None: sep = ',' if name is None: raise ValueError('Must supply an instrument name or file path.') elif not isinstance(name, str): raise ValueError('keyword name must be related to a string') elif not os.path.isfile(name): # Not a real file, assume input is a pysat instrument name # and look in the standard pysat location. test = os.path.join(pysat.__path__[0],'instruments',name) if os.path.isfile(test): name = test else: #trying to form an absolute path for success test = os.path.abspath(name) if not os.path.isfile(test): raise ValueError("Unable to create valid file path.") else: #success name = test mdata = pds.read_csv(name, names=col_names, sep=sep, **kwargs) if not mdata.empty: # make sure the data name is the index mdata.index = mdata['name'] del mdata['name'] return cls(metadata=mdata) else: raise ValueError('Unable to retrieve information from ' + name)