Python DataFrame Exemples, pysat.DataFrame Python Exemples

Exemple #1

0

Afficher le fichier

def computational_form(data):
    """
    Repackages numbers, Series, or DataFrames

    Regardless of input format, mathematical operations may be performed on the output via the same pandas mechanisms.

    This method may be particularly useful in analysis methods that aim to be instrument independent. pysat.Instrument objects can package data in a variety of ways within a DataFrame, depending upon the scientific data source. Thus, a variety of data types will be encountered by instrument independent methods and computational_form method may reduce the effort required to support more generalized processing.

    Parameters
    ----------
    data : pandas.Series
        Series of numbers, Series, DataFrames

    Returns
    -------
    pandas.Series, DataFrame, or Panel
        repacked data, aligned by indices, ready for calculation
    """

    from pysat import DataFrame, Series, datetime, Panel

    if isinstance(data.iloc[0], DataFrame):
        dslice = Panel.from_dict(
            dict([(i, data.iloc[i]) for i in xrange(len(data))]))
    elif isinstance(data.iloc[0], Series):
        dslice = DataFrame(data.tolist())
        dslice.index = data.index
    else:
        dslice = data
    return dslice

Exemple #2

0

Afficher le fichier

Fichier : utils.py Projet : rstoneback/pysat

def computational_form(data):
    """
    Input Series of numbers, Series, or DataFrames repackaged
    for calculation.

    Parameters
    ----------
    data : pandas.Series
        Series of numbers, Series, DataFrames

    Returns
    -------
    pandas.Series, DataFrame, or Panel
        repacked data, aligned by indices, ready for calculation
    """

    if isinstance(data.iloc[0], DataFrame):
        dslice = Panel.from_dict(dict([(i,data.iloc[i])
                                       for i in xrange(len(data))]))
    elif isinstance(data.iloc[0], Series):
        dslice = DataFrame(data.tolist())
        dslice.index = data.index
    else:
        dslice = data
    return dslice

Exemple #3

0

Afficher le fichier

def computational_form(data):
    """
    Input Series of numbers, Series, or DataFrames repackaged
    for calculation.

    Parameters
    ----------
    data : pandas.Series
        Series of numbers, Series, DataFrames

    Returns
    -------
    pandas.Series, DataFrame, or Panel
        repacked data, aligned by indices, ready for calculation
    """

    if isinstance(data.iloc[0], DataFrame):
        dslice = Panel.from_dict(
            dict([(i, data.iloc[i]) for i in xrange(len(data))]))
    elif isinstance(data.iloc[0], Series):
        dslice = DataFrame(data.tolist())
        dslice.index = data.index
    else:
        dslice = data
    return dslice

Exemple #4

0

Afficher le fichier

    def __init__(self,
                 metadata=None,
                 units_label='units',
                 name_label='long_name',
                 notes_label='notes',
                 desc_label='desc',
                 plot_label='label',
                 axis_label='axis',
                 scale_label='scale',
                 min_label='value_min',
                 max_label='value_max',
                 fill_label='fill',
                 export_nan=[]):

        # set mutability of Meta attributes
        self.mutable = True

        # set units and name labels directly
        self._units_label = units_label
        self._name_label = name_label
        self._notes_label = notes_label
        self._desc_label = desc_label
        self._plot_label = plot_label
        self._axis_label = axis_label
        self._scale_label = scale_label
        self._min_label = min_label
        self._max_label = max_label
        self._fill_label = fill_label
        # by default metadata with a value of nan will not be exported
        # unless the name is in the _export_nan list. Initialize the list
        # with the fill label, since it is reasonable to assume that a fill
        # value of nan would be intended to be exported
        self._export_nan = [fill_label] + export_nan
        # init higher order (nD) data structure container, a dict
        self._ho_data = {}
        # use any user provided data to instantiate object with data
        # attirube unit and name labels are called within
        if metadata is not None:
            if isinstance(metadata, DataFrame):
                self._data = metadata
                # make sure defaults are taken care of for required metadata
                self.accept_default_labels(self)
            else:
                raise ValueError(''.join(('Input must be a pandas DataFrame',
                                          'type. See other constructors for',
                                          ' alternate inputs.')))
        else:
            self._data = DataFrame(None,
                                   columns=[
                                       self._units_label, self._name_label,
                                       self._desc_label, self._plot_label,
                                       self._axis_label, self._scale_label,
                                       self.notes_label, self._min_label,
                                       self._max_label, self._fill_label
                                   ])

        # establish attributes intrinsic to object, before user can
        # add any
        self._base_attr = dir(self)

Exemple #5

0

Afficher le fichier

    def __init__(self,
                 metadata=None,
                 units_label='units',
                 name_label='long_name',
                 notes_label='notes',
                 desc_label='desc',
                 plot_label='label',
                 axis_label='axis',
                 scale_label='scale',
                 min_label='value_min',
                 max_label='value_max',
                 fill_label='fill'):
        # set units and name labels directly
        self._units_label = units_label
        self._name_label = name_label
        self._notes_label = notes_label
        self._desc_label = desc_label
        self._plot_label = plot_label
        self._axis_label = axis_label
        self._scale_label = scale_label
        self._min_label = min_label
        self._max_label = max_label
        self._fill_label = fill_label
        # init higher order (nD) data structure container, a dict
        self._ho_data = {}
        # use any user provided data to instantiate object with data
        # attirube unit and name labels are called within
        if metadata is not None:
            if isinstance(metadata, DataFrame):
                self._data = metadata
                # make sure defaults are taken care of for required metadata
                self.accept_default_labels(self)
            else:
                raise ValueError(
                    "Input must be a pandas DataFrame type. " +
                    "See other constructors for alternate inputs.")
        else:
            self._data = DataFrame(None,
                                   columns=[
                                       self._units_label, self._name_label,
                                       self._desc_label, self._plot_label,
                                       self._axis_label, self._scale_label,
                                       self.notes_label, self._min_label,
                                       self._max_label, self._fill_label
                                   ])

        # establish attributes intrinsic to object, before user can
        # add any
        self._base_attr = dir(self)

Exemple #6

0

Afficher le fichier

def computational_form(data):
    """
    Repackages numbers, Series, or DataFrames

    .. deprecated:: 2.2.0
      `computational_form` will be removed in pysat 3.0.0, it will
      be added to pysatSeasons

    Regardless of input format, mathematical operations may be performed on the
    output via the same pandas mechanisms.

    This method may be particularly useful in analysis methods that aim to be
    instrument independent. pysat.Instrument objects can package data in a
    variety of ways within a DataFrame, depending upon the scientific data
    source. Thus, a variety of data types will be encountered by instrument
    independent methods and computational_form method may reduce the effort
    required to support more generalized processing.

    Parameters
    ----------
    data : pandas.Series
        Series of numbers, Series, DataFrames

    Returns
    -------
    pandas.Series, DataFrame, or Panel
        repacked data, aligned by indices, ready for calculation
    """

    from pysat import DataFrame, Series, Panel
    import warnings

    warnings.warn(' '.join([
        "This function is deprecated here and will be",
        "removed in pysat 3.0.0. Please use", "pysatSeasons instead:"
        "https://github.com/pysat/pysatSeasons"
    ]),
                  DeprecationWarning,
                  stacklevel=2)

    if isinstance(data.iloc[0], DataFrame):
        dslice = Panel.from_dict(
            dict([(i, data.iloc[i]) for i in range(len(data))]))
    elif isinstance(data.iloc[0], Series):
        dslice = DataFrame(data.tolist())
        dslice.index = data.index
    else:
        dslice = data
    return dslice

Exemple #7

0

Afficher le fichier

Fichier : _instrument.py Projet : yangjian615/pysat

    def _load_data(self, date=None, fid=None):
        """
        Load data for an instrument on given date or fid, dependng upon input.
        
        """

        if fid is not None:
            # get filename based off of index value
            fname = self.files[fid:fid + 1]
        elif date is not None:
            fname = self.files[date:date + pds.DateOffset(days=1)]
        else:
            raise ValueError('Must supply either a date or file id number.')

        if len(fname) > 0:
            load_fname = [os.path.join(self.files.data_path, f) for f in fname]
            data, mdata = self._load_rtn(load_fname,
                                         tag=self.tag,
                                         sat_id=self.sat_id,
                                         **self.kwargs)
        else:
            data = DataFrame(None)
            mdata = _meta.Meta()

        output_str = '{platform} {name} {tag} {sat_id}'
        output_str = output_str.format(platform=self.platform,
                                       name=self.name,
                                       tag=self.tag,
                                       sat_id=self.sat_id)
        if not data.empty:
            if not isinstance(data, DataFrame):
                raise TypeError(
                    string.join(('Data returned by instrument load',
                                 'routine must be a pandas.DataFrame')))
            if not isinstance(mdata, _meta.Meta):
                raise TypeError(
                    'Metadata returned must be a pysat.Meta object')
            if date is not None:
                output_str = ' '.join(
                    ('Returning', output_str, 'data for', date.strftime('%D')))
            else:
                if len(fname) == 1:
                    # this check was zero
                    output_str = ' '.join(
                        ('Returning', output_str, 'data from', fname[0]))
                else:
                    output_str = ' '.join(
                        ('Returning', output_str, 'data from', fname[0], '::',
                         fname[-1]))
        else:
            # no data signal
            output_str = ' '.join(
                ('No', output_str, 'data for', date.strftime('%D')))
        # remove extra spaces, if any
        output_str = " ".join(output_str.split())
        print(output_str)
        return data, mdata

Exemple #8

0

Afficher le fichier

Fichier : _meta.py Projet : rstoneback/pysat

    def __init__(self, metadata=None, units_label='units', name_label='long_name',
                 notes_label='notes', desc_label='desc', plot_label='label',
                 axis_label='axis', scale_label='scale', min_label='value_min',
                 max_label='value_max', fill_label='fill'):
        # set units and name labels directly
        self._units_label = units_label
        self._name_label = name_label
        self._notes_label = notes_label
        self._desc_label = desc_label
        self._plot_label = plot_label
        self._axis_label = axis_label
        self._scale_label = scale_label
        self._min_label = min_label
        self._max_label = max_label
        self._fill_label = fill_label
        # init higher order (nD) data structure container, a dict
        self._ho_data = {}
        # use any user provided data to instantiate object with data
        # attirube unit and name labels are called within
        if metadata is not None:
            if isinstance(metadata, DataFrame):
                self._data = metadata
                # make sure defaults are taken care of for required metadata
                self.accept_default_labels(self)
            else:
                raise ValueError("Input must be a pandas DataFrame type. "+
                            "See other constructors for alternate inputs.")
        else:
            self._data = DataFrame(None, columns=[self._units_label, self._name_label,
                                                 self._desc_label,
                                                 self._plot_label, self._axis_label,
                                                 self._scale_label, self.notes_label,
                                                 self._min_label, self._max_label,
                                                 self._fill_label])

        # establish attributes intrinsic to object, before user can
        # add any
        self._base_attr = dir(self)

Exemple #9

0

Afficher le fichier

Fichier : _meta.py Projet : jcspence/pysat-1

 def replace(self, metadata=None):
     """Replace stored metadata with input data.
     
     Parameters
     ----------
     metadata : pandas.DataFrame 
         DataFrame should be indexed by variable name that contains at minimum the 
         standard_name (name), units, and long_name for the data stored in the associated 
         pysat Instrument object.
         
     """
     if metadata is not None:
         if isinstance(metadata, DataFrame):
             self.data = metadata
             lower_columns = [name.lower() for name in self.data.columns]
             if 'long_name' not in lower_columns:
                 self.data[self._name_label] = self.data.index
             if 'units' not in lower_columns:
                 self.data[self._units_label] = ''
         else:
             raise ValueError("Input must be a pandas DataFrame type. "+
                         "See other constructors for alternate inputs.")
     else:
         self.data = DataFrame(None, columns=[self._name_label, self._units_label])

Exemple #10

0

Afficher le fichier

    def load(self, orbit=None):
        """Load a particular orbit into .data for loaded day.

        Parameters
        ----------
        orbit : int
            orbit number, 1 indexed

        Note
        ----    
        A day of data must be loaded before this routine functions properly.
        If the last orbit of the day is requested, it will automatically be
        padded with data from the next day. The orbit counter will be 
        reset to 1.
        
        """
        if len(self.sat.data) > 0:  # ensure data exists
            # set up orbit metadata
            self._calcOrbits()
            # ensure user supplied an orbit
            if orbit is not None:
                # pull out requested orbit
                if orbit < 0:
                    # negative indexing consistent with numpy, -1 last, -2 second
                    # to last, etc.
                    orbit = self.num + 1 + orbit

                if orbit == 1:
                    # change from orig copied from _core, didn't look correct.
                    # self._getBasicOrbit(orbit=2)
                    try:
                        true_date = self.sat.date  # .copy()

                        self.sat.prev()
                        # if and else added becuase of CINDI turn off 6/5/2013,turn on 10/22/2014
                        # crashed when starting on 10/22/2014
                        # prev returned empty data
                        if len(self.sat.data) > 0:
                            self.load(orbit=-1)
                        else:
                            self.sat.next()
                            self._getBasicOrbit(orbit=1)
                        # check that this orbit should end on the current day
                        delta = pds.to_timedelta(true_date -
                                                 self.sat.data.index[0])
                        # print 'checking if first orbit should land on requested day'
                        # print self.sat.date, self.sat.data.index[0], delta, delta >= self.orbit_period
                        # print delta - self.orbit_period
                        if delta >= self.orbit_period:
                            # the orbit loaded isn't close enough to date
                            # to be the first orbit of the day, move forward
                            self.next()
                    except StopIteration:
                        # print 'going for basic orbit'
                        self._getBasicOrbit(orbit=1)
                        # includes hack to appear to be zero indexed
                        print('Loaded Orbit:%i' % (self.current - 1))
                        # check if the first orbit is also the last orbit

                elif orbit == self.num:
                    # we get here if user asks for last orbit
                    # make sure that orbit data goes across daybreak as needed
                    # load previous orbit
                    if self.num != 1:
                        self._getBasicOrbit(self.num - 1)
                        self.next()
                    else:
                        self._getBasicOrbit(orbit=-1)

                elif orbit < self.num:
                    # load orbit data into data
                    self._getBasicOrbit(orbit)
                    # includes hack to appear to be zero indexed
                    print('Loaded Orbit:%i' % (self.current - 1))

                else:
                    # gone too far
                    self.sat.data = DataFrame()
                    raise Exception(
                        'Requested an orbit past total orbits for day')
            else:
                raise Exception('Must set an orbit')
        else:
            print('No data loaded in instrument object to determine orbits.')

Exemple #11

0

Afficher le fichier

Fichier : _instrument.py Projet : yangjian615/pysat

    def load(self,
             yr=None,
             doy=None,
             date=None,
             fname=None,
             fid=None,
             verifyPad=False):
        """Load instrument data into Instrument object .data.

        Parameters
        ----------
        yr : integer
            year for desired data
        doy : integer
            day of year
        date : datetime object
            date to load
        fname : 'string'
            filename to be loaded
        verifyPad : boolean 
            if True, padding data not removed (debug purposes)

        Returns
        --------
        Void.  Data is added to self.data

        Note
        ----
        Loads data for a chosen instrument into .data. Any functions chosen
        by the user and added to the custom processing queue (.custom.add)
        are automatically applied to the data before it is available to 
        user in .data.
        
        """

        if date is not None:
            # date supplied getyrdoy checks if it is datetime
            year, doy = utils.getyrdoy(date)
            self.yr = year
            self.doy = doy
            self.date = date
            self._fid = None
            self._load_by_date = True
            inc = pds.DateOffset(days=1)
            curr = date
        elif (yr is not None) & (doy is not None):
            # if date not defined but both yr and doy are
            self.date = pds.datetime(yr, 1, 1) + pds.DateOffset(days=(doy - 1))
            self.yr = yr
            self.doy = doy
            self._fid = None
            self._load_by_date = True
            inc = pds.DateOffset(days=1)
            curr = self.date
        elif fname is not None:
            # date will have to be set later by looking at the data
            self.date = None
            self.yr = None
            self.doy = None
            self._load_by_date = False
            # if no index, called func tries to find file in instrument dir,
            # throws error if it fails
            self._fid = self.files.get_index(fname)
            inc = 1
            curr = self._fid.copy()
        elif fid is not None:
            self._load_by_date = False
            self._fid = fid
            self.date = None
            self.yr = None
            self.doy = None
            inc = 1
            curr = fid
        else:
            estr = 'Must supply a yr,doy pair, or datetime object, or filename'
            estr = '{:s} to load data from.'.format(estr)
            raise TypeError(estr)

        self.orbits._reset()
        # if pad is true, need to have a three day/file load
        if (self.pad is not None) | self.multi_file_day:
            if self._next_data.empty & self._prev_data.empty:
                # data has not already been loaded for previous and next days
                # load data for all three
                print('Initializing three day/file window')
                # using current date or fid
                self._prev_data, self._prev_meta = self._load_prev()
                self._curr_data, self._curr_meta = \
                    self._load_data(date=self.date, fid=self._fid)
                self._next_data, self._next_meta = self._load_next()
            else:
                # moving forward in time
                if self._next_data_track == curr:
                    self._prev_data = self._curr_data
                    self._prev_meta = self._curr_meta
                    self._curr_data = self._next_data
                    self._curr_meta = self._next_meta
                    self._next_data, self._next_meta = self._load_next()
                # moving backward in time
                elif self._prev_data_track == curr:
                    self._next_data = self._curr_data
                    self._next_meta = self._curr_meta
                    self._curr_data = self._prev_data
                    self._curr_meta = self._prev_meta
                    self._prev_data, self._prev_meta = self._load_prev()
                # jumped in time/or switched from filebased to date based access
                else:
                    self._prev_data, self._prev_meta = self._load_prev()
                    self._curr_data, self._curr_meta = \
                                self._load_data(date=self.date, fid=self._fid)
                    self._next_data, self._next_meta = self._load_next()

            # make sure datetime indices for all data is monotonic
            if not self._prev_data.index.is_monotonic_increasing:
                self._prev_data.sort_index(inplace=True)
            if not self._curr_data.index.is_monotonic_increasing:
                self._curr_data.sort_index(inplace=True)
            if not self._next_data.index.is_monotonic_increasing:
                self._next_data.sort_index(inplace=True)

            # make tracking indexes consistent with new loads
            self._next_data_track = curr + inc
            self._prev_data_track = curr - inc
            # attach data to object
            if not self._curr_data.empty:
                self.data = self._curr_data.copy()
                self.meta = self._curr_meta.copy()
            else:
                self.data = DataFrame(None)
                # line below removed as it would delete previous meta, if any
                # if you end a seasonal analysis with a day with no data, then
                # no meta: self.meta = _meta.Meta()

            if self.multi_file_day:
                self.data = self.data.ix[self.date:self.date + pds.DateOffset(
                    hours=23, minutes=59, seconds=59, nanoseconds=99999999)]

            # pad data based upon passed parameter
            if (not self._prev_data.empty) & (not self.data.empty):
                if self.multi_file_day and self._load_by_date:
                    padLeft = self._prev_data.ix[(
                        self.date):self._curr_data.index[0]]
                else:
                    padLeft = self._prev_data.ix[(
                        self._curr_data.index[0] -
                        self.pad):self._curr_data.index[0]]
                #self.data = pds.concat([padLeft[0:-1], self.data])
                self.data = pds.concat([padLeft, self.data])

            if (not self._next_data.empty) & (not self.data.empty):

                if self.multi_file_day and self._load_by_date:
                    padRight = self._next_data.ix[self.date : (self.date + \
        pds.DateOffset(hours=23, minutes=59, seconds=59, nanoseconds=99999999))]
                else:
                    padRight = self._next_data.ix[self._curr_data.index[-1]:(
                        self._curr_data.index[-1] + self.pad)]
                #self.data = pds.concat([self.data, padRight[1:]])
                self.data = pds.concat([self.data, padRight])

            # drop any possible duplicate index times
            #self.data.drop_duplicates(inplace=True)
            self.data = self.data[~self.data.index.duplicated()]

        # if self.pad is False, load single day
        else:
            self.data, meta = self._load_data(date=self.date, fid=self._fid)
            if not self.data.empty:
                self.meta = meta

        # check if load routine actually returns meta
        if self.meta.data.empty:
            self.meta[self.data.columns] = {
                'long_name': self.data.columns,
                'units': [''] * len(self.data.columns)
            }
        # if loading by file set the yr, doy, and date
        if not self._load_by_date:
            temp = self.data.index[0]
            temp = pds.datetime(temp.year, temp.month, temp.day)
            self.date = temp
            self.yr, self.doy = utils.getyrdoy(self.date)

        if not self.data.empty:
            self._default_rtn(self)
        # clean
        if (not self.data.empty) & (self.clean_level != 'none'):
            self._clean_rtn(self)
        # apply custom functions
        if not self.data.empty:
            self.custom._apply_all(self)
        # remove the excess padding, if any applied
        if (self.pad is not None) & (not self.data.empty) & (not verifyPad):
            self.data = self.data[self._curr_data.index[0]:self._curr_data.
                                  index[-1]]

        sys.stdout.flush()
        return

Exemple #12

0

Afficher le fichier

Fichier : _instrument.py Projet : yangjian615/pysat

class Instrument(object):
    """Download, load, manage, modify and analyze science data.
    
    Parameters
    ----------    
    platform : string
        name of platform/satellite.
    name : string
        name of instrument. 
    tag : string, optional
        identifies particular subset of instrument data.
    sat_id : string, optional
        identity within constellation
    clean_level : {'clean','dusty','dirty','none'}, optional
        level of data quality
    pad : pandas.DateOffset, or dictionary, optional
        Length of time to pad the begining and end of loaded data for 
        time-series processing. Extra data is removed after applying all 
        custom functions. Dictionary, if supplied, is simply passed to 
        pandas DateOffset.
    orbit_info : dict    
        Orbit information, {'index':index, 'kind':kind, 'period':period}.
        See pysat.Orbits for more information.            
    inst_module : module, optional
        Provide instrument module directly. 
        Takes precedence over platform/name.
    update_files : boolean, optional
        If True, immediately query filesystem for instrument files and store.
    temporary_file_list : boolean, optional
        If true, the list of Instrument files will not be written to disk.
        Prevents a race condition when running multiple pysat processes.
    multi_file_day : boolean, optional 
        Set to True if Instrument data files for a day are spread across
        multiple files and data for day n could be found in a file
        with a timestamp of day n-1 or n+1.
    manual_org : bool
        if True, then pysat will look directly in pysat data directory
        for data files and will not use default /platform/name/tag
    directory_format : str
        directory naming structure in string format. Variables such as
        platform, name, and tag will be filled in as needed using python
        string formatting. The default directory structure would be 
        expressed as '{platform}/{name}/{tag}'
    file_format : str or NoneType
        File naming structure in string format.  Variables such as year,
        month, and sat_id will be filled in as needed using python string
        formatting.  The default file format structure is supplied in the
        instrument list_files routine.
               
    Attributes
    ----------
    data : pandas.DataFrame
        loaded science data 
    date : pandas.datetime
        date for loaded data
    yr : int
        year for loaded data
    bounds : (datetime/filename/None, datetime/filename/None)
        bounds for loading data, supply array_like for a season with gaps
    doy : int
        day of year for loaded data
    files : pysat.Files
        interface to instrument files
    meta : pysat.Meta
        interface to instrument metadata, similar to netCDF 1.6
    orbits : pysat.Orbits
        interface to extracting data orbit-by-orbit
    custom : pysat.Custom
        interface to instrument nano-kernel
    kwargs : dictionary
        keyword arguments passed to instrument loading routine
    
    Note
    ----
    Pysat attempts to load the module platform_name.py located in
    the pysat/instruments directory. This module provides the underlying 
    functionality to download, load, and clean instrument data. 
    Alternatively, the module may be supplied directly
    using keyword inst_module.
    
    Examples
    --------     
    :: 
           
        # 1-second mag field data
        vefi = pysat.Instrument(platform='cnofs', 
                                name='vefi', 
                                tag='dc_b', 
                                clean_level='clean')
        start = pysat.datetime(2009,1,1)
        stop = pysat.datetime(2009,1,2)
        vefi.download(start, stop)
        vefi.load(date=start)
        print(vefi['dB_mer'])
        print(vefi.meta['db_mer'])
    
        # 1-second thermal plasma parameters
        ivm = pysat.Instrument(platform='cnofs', 
                                name='ivm', 
                                tag='', 
                                clean_level='clean')
        ivm.download(start,stop)
        ivm.load(2009,1)
        print(ivm['ionVelmeridional'])
        
        # Ionosphere profiles from GPS occultation
        cosmic = pysat.Instrument('cosmic2013', 
                                    'gps', 
                                    'ionprf', 
                                    altitude_bin=3)
        # bins profile using 3 km step
        cosmic.download(start, stop, user=user, password=password)
        cosmic.load(date=start)

    """
    def __init__(self,
                 platform=None,
                 name=None,
                 tag=None,
                 sat_id=None,
                 clean_level='clean',
                 update_files=None,
                 pad=None,
                 orbit_info=None,
                 inst_module=None,
                 multi_file_day=None,
                 manual_org=None,
                 directory_format=None,
                 file_format=None,
                 temporary_file_list=False,
                 *arg,
                 **kwargs):

        if inst_module is None:
            # use strings to look up module name
            if isinstance(platform, str) and isinstance(name, str):
                self.platform = platform.lower()
                self.name = name.lower()
                # look to module for instrument functions and defaults
                self._assign_funcs(by_name=True)
            elif (platform is None) and (name is None):
                # creating "empty" Instrument object with this path
                self.name = ''
                self.platform = ''
                self._assign_funcs()
            else:
                raise ValueError(
                    'Inputs platform and name must both be strings, or both None.'
                )
        else:
            # user has provided a module
            try:
                # platform and name are expected to be part of module
                self.name = inst_module.name.lower()
                self.platform = inst_module.platform.lower()
            except AttributeError:
                raise AttributeError(
                    string.join((
                        'A name and platform attribute for the ',
                        'instrument is required if supplying routine module directly.'
                    )))
            # look to module for instrument functions and defaults
            self._assign_funcs(inst_module=inst_module)

        # more reasonable defaults for optional parameters
        self.tag = tag.lower() if tag is not None else ''
        self.sat_id = sat_id.lower() if sat_id is not None else ''
        self.clean_level = (clean_level.lower()
                            if clean_level is not None else 'none')

        # assign_func sets some instrument defaults, direct info rules all
        if directory_format is not None:
            self.directory_format = directory_format.lower()
        # value not provided by user, check if there is a value provided by
        # instrument module
        elif self.directory_format is not None:
            try:
                # check if it is a function
                self.directory_format = self.directory_format(tag, sat_id)
            except TypeError:
                pass

        if file_format is not None:
            self.file_format = file_format
        # value not provided by user, check if there is a value provided by
        # instrument module
        elif self.file_format is not None:
            # check if it is an iterable string.  If it isn't formatted
            # properly, give a warning and set file_format to None
            if (not isinstance(self.file_format, str)
                    or self.file_format.find("{") < 0
                    or self.file_format.find("}") < 1):
                estr = 'file format set to default, supplied string must be '
                estr = '{:s}iteratable [{:}]'.format(estr, self.file_format)
                print(estr)
                self.file_format = None

        # set up empty data and metadata
        self.data = DataFrame(None)
        self.meta = _meta.Meta()

        # function processing class, processes data on load
        self.custom = _custom.Custom()
        # create arrays to store data around loaded day
        # enables padding across day breaks with minimal loads
        self._next_data = DataFrame(None)
        self._next_data_track = []
        self._prev_data = DataFrame(None)
        self._prev_data_track = []
        self._curr_data = DataFrame(None)

        # multi file day, default set by assign_funcs
        if multi_file_day is not None:
            self.multi_file_day = multi_file_day

        # arguments for padding
        if isinstance(pad, pds.DateOffset):
            self.pad = pad
        elif isinstance(pad, dict):
            self.pad = pds.DateOffset(**pad)
        elif pad is None:
            self.pad = None
        else:
            estr = 'pad must be a dictionary or a pandas.DateOffset instance.'
            raise ValueError(estr)

        # instantiate Files class
        manual_org = False if manual_org is None else manual_org
        temporary_file_list = not temporary_file_list
        self.files = _files.Files(self,
                                  manual_org=manual_org,
                                  directory_format=self.directory_format,
                                  update_files=update_files,
                                  file_format=self.file_format,
                                  write_to_disk=temporary_file_list)

        # set bounds for iteration
        # self.bounds requires the Files class
        # setting (None,None) loads default bounds
        self.bounds = (None, None)
        self.date = None
        self._fid = None
        self.yr = None
        self.doy = None
        self._load_by_date = False

        # initialize orbit support
        if orbit_info is None:
            if self.orbit_info is None:
                # if default info not provided, set None as default
                orbit_info = {'index': None, 'kind': None, 'period': None}
            else:
                # default provided by instrument module
                orbit_info = self.orbit_info
        self.orbits = _orbits.Orbits(self, **orbit_info)

        # store kwargs, passed to load routine
        self.kwargs = kwargs

        # run instrument init function, a basic pass function is used
        # if user doesn't supply the init function
        self._init_rtn(self)

    def __getitem__(self, key):
        """
        Convenience notation for accessing data; inst['name'] is inst.data.name
        
        Examples
        --------
        ::
        
            # By name
            inst['name']
            # By position  
            inst[row_index, 'name']  
            # Slicing by row 
            inst[row1:row2, 'name']            
            # By Date  
            inst[datetime, 'name']
            # Slicing by date, inclusive
            inst[datetime1:datetime2, 'name']
            # Slicing by name and row/date  
            inst[datetime1:datetime1, 'name1':'name2']
            
        """

        if isinstance(key, tuple):
            # support slicing
            return self.data.ix[key[0], key[1]]
        else:
            return self.data[key]

    def __setitem__(self, key, new):
        """Convenience method for adding data to instrument.
        
        Examples
        --------
        ::
        
            # Simple Assignment, default metadata assigned
            # 'long_name' = 'name'
            # 'units' = ''
            inst['name'] = newData
            # Assignment with Metadata 
            inst['name'] = {'data':new_data, 
                            'long_name':long_name, 
                            'units':units}
        
        Note
        ----
        If no metadata provided and if metadata for 'name' not already stored 
        then default meta information is also added, 
        long_name = 'name', and units = ''.
        
        """
        if isinstance(new, dict):
            # metadata should be included in dict
            self.data[key] = new.pop('data')
            # pass the rest to meta
            self.meta[key] = new
        else:
            if isinstance(key, tuple):
                self.data.ix[key[0], key[1]] = new
                self.meta[key[1]] = {}
            elif isinstance(key, str):
                self.data[key] = new
                self.meta[key] = {}
            elif isinstance(new, DataFrame):
                self.data[key] = new[key]
                for ke in key:
                    self.meta[ke] = {}
            else:
                raise ValueError("No support for supplied input key")

    def copy(self):
        """Deep copy of the entire Instrument object."""
        return copy.deepcopy(self)

    def _pass_func(*args, **kwargs):
        pass

    def _assign_funcs(self, by_name=False, inst_module=None):
        """Assign all external science instrument methods to Instrument object."""
        import importlib
        # set defaults
        self._list_rtn = self._pass_func
        self._load_rtn = self._pass_func
        self._default_rtn = self._pass_func
        self._clean_rtn = self._pass_func
        self._init_rtn = self._pass_func
        self._download_rtn = self._pass_func
        # default params
        self.directory_format = None
        self.file_format = None
        self.multi_file_day = False
        self.orbit_info = None

        if by_name:
            # look for code with filename name, any errors passed up
            inst = importlib.import_module(''.join(
                ('.', self.platform, '_', self.name)),
                                           package='pysat.instruments')
        elif inst_module is not None:
            # user supplied an object with relevant instrument routines
            inst = inst_module
        else:
            # no module or name info, default pass functions assigned
            return

        try:
            self._load_rtn = inst.load
            self._list_rtn = inst.list_files
            self._download_rtn = inst.download
        except AttributeError:
            estr = 'A load, file_list, and download routine are required for '
            raise AttributeError('{:s}every instrument.'.format(estr))
        try:
            self._default_rtn = inst.default
        except AttributeError:
            pass
        try:
            self._init_rtn = inst.init
        except AttributeError:
            pass
        try:
            self._clean_rtn = inst.clean
        except AttributeError:
            pass

        # look for instrument default parameters
        try:
            self.directory_format = inst.directory_format
        except AttributeError:
            pass
        try:
            self.multi_file_day = inst.self.multi_file_day
        except AttributeError:
            pass
        try:
            self.orbit_info = inst.orbit_info
        except AttributeError:
            pass

        return

    def _load_data(self, date=None, fid=None):
        """
        Load data for an instrument on given date or fid, dependng upon input.
        
        """

        if fid is not None:
            # get filename based off of index value
            fname = self.files[fid:fid + 1]
        elif date is not None:
            fname = self.files[date:date + pds.DateOffset(days=1)]
        else:
            raise ValueError('Must supply either a date or file id number.')

        if len(fname) > 0:
            load_fname = [os.path.join(self.files.data_path, f) for f in fname]
            data, mdata = self._load_rtn(load_fname,
                                         tag=self.tag,
                                         sat_id=self.sat_id,
                                         **self.kwargs)
        else:
            data = DataFrame(None)
            mdata = _meta.Meta()

        output_str = '{platform} {name} {tag} {sat_id}'
        output_str = output_str.format(platform=self.platform,
                                       name=self.name,
                                       tag=self.tag,
                                       sat_id=self.sat_id)
        if not data.empty:
            if not isinstance(data, DataFrame):
                raise TypeError(
                    string.join(('Data returned by instrument load',
                                 'routine must be a pandas.DataFrame')))
            if not isinstance(mdata, _meta.Meta):
                raise TypeError(
                    'Metadata returned must be a pysat.Meta object')
            if date is not None:
                output_str = ' '.join(
                    ('Returning', output_str, 'data for', date.strftime('%D')))
            else:
                if len(fname) == 1:
                    # this check was zero
                    output_str = ' '.join(
                        ('Returning', output_str, 'data from', fname[0]))
                else:
                    output_str = ' '.join(
                        ('Returning', output_str, 'data from', fname[0], '::',
                         fname[-1]))
        else:
            # no data signal
            output_str = ' '.join(
                ('No', output_str, 'data for', date.strftime('%D')))
        # remove extra spaces, if any
        output_str = " ".join(output_str.split())
        print(output_str)
        return data, mdata

    def _load_next(self):
        """Load the next days data (or file) without incrementing the date.
        Repeated calls will not advance date/file and will produce the same data
        
        Uses info stored in object to either increment the date, 
        or the file. Looks for self._load_by_date flag. 
         
        """
        if self._load_by_date:
            next_date = self.date + pds.DateOffset(days=1)
            return self._load_data(date=next_date)
        else:
            return self._load_data(fid=self._fid + 1)

    def _load_prev(self):
        """Load the next days data (or file) without decrementing the date.
        Repeated calls will not decrement date/file and will produce the same data
        
        Uses info stored in object to either decrement the date, 
        or the file. Looks for self._load_by_date flag.  
        
        """

        if self._load_by_date:
            prev_date = self.date - pds.DateOffset(days=1)
            return self._load_data(date=prev_date)
        else:
            return self._load_data(fid=self._fid - 1)

    def load(self,
             yr=None,
             doy=None,
             date=None,
             fname=None,
             fid=None,
             verifyPad=False):
        """Load instrument data into Instrument object .data.

        Parameters
        ----------
        yr : integer
            year for desired data
        doy : integer
            day of year
        date : datetime object
            date to load
        fname : 'string'
            filename to be loaded
        verifyPad : boolean 
            if True, padding data not removed (debug purposes)

        Returns
        --------
        Void.  Data is added to self.data

        Note
        ----
        Loads data for a chosen instrument into .data. Any functions chosen
        by the user and added to the custom processing queue (.custom.add)
        are automatically applied to the data before it is available to 
        user in .data.
        
        """

        if date is not None:
            # date supplied getyrdoy checks if it is datetime
            year, doy = utils.getyrdoy(date)
            self.yr = year
            self.doy = doy
            self.date = date
            self._fid = None
            self._load_by_date = True
            inc = pds.DateOffset(days=1)
            curr = date
        elif (yr is not None) & (doy is not None):
            # if date not defined but both yr and doy are
            self.date = pds.datetime(yr, 1, 1) + pds.DateOffset(days=(doy - 1))
            self.yr = yr
            self.doy = doy
            self._fid = None
            self._load_by_date = True
            inc = pds.DateOffset(days=1)
            curr = self.date
        elif fname is not None:
            # date will have to be set later by looking at the data
            self.date = None
            self.yr = None
            self.doy = None
            self._load_by_date = False
            # if no index, called func tries to find file in instrument dir,
            # throws error if it fails
            self._fid = self.files.get_index(fname)
            inc = 1
            curr = self._fid.copy()
        elif fid is not None:
            self._load_by_date = False
            self._fid = fid
            self.date = None
            self.yr = None
            self.doy = None
            inc = 1
            curr = fid
        else:
            estr = 'Must supply a yr,doy pair, or datetime object, or filename'
            estr = '{:s} to load data from.'.format(estr)
            raise TypeError(estr)

        self.orbits._reset()
        # if pad is true, need to have a three day/file load
        if (self.pad is not None) | self.multi_file_day:
            if self._next_data.empty & self._prev_data.empty:
                # data has not already been loaded for previous and next days
                # load data for all three
                print('Initializing three day/file window')
                # using current date or fid
                self._prev_data, self._prev_meta = self._load_prev()
                self._curr_data, self._curr_meta = \
                    self._load_data(date=self.date, fid=self._fid)
                self._next_data, self._next_meta = self._load_next()
            else:
                # moving forward in time
                if self._next_data_track == curr:
                    self._prev_data = self._curr_data
                    self._prev_meta = self._curr_meta
                    self._curr_data = self._next_data
                    self._curr_meta = self._next_meta
                    self._next_data, self._next_meta = self._load_next()
                # moving backward in time
                elif self._prev_data_track == curr:
                    self._next_data = self._curr_data
                    self._next_meta = self._curr_meta
                    self._curr_data = self._prev_data
                    self._curr_meta = self._prev_meta
                    self._prev_data, self._prev_meta = self._load_prev()
                # jumped in time/or switched from filebased to date based access
                else:
                    self._prev_data, self._prev_meta = self._load_prev()
                    self._curr_data, self._curr_meta = \
                                self._load_data(date=self.date, fid=self._fid)
                    self._next_data, self._next_meta = self._load_next()

            # make sure datetime indices for all data is monotonic
            if not self._prev_data.index.is_monotonic_increasing:
                self._prev_data.sort_index(inplace=True)
            if not self._curr_data.index.is_monotonic_increasing:
                self._curr_data.sort_index(inplace=True)
            if not self._next_data.index.is_monotonic_increasing:
                self._next_data.sort_index(inplace=True)

            # make tracking indexes consistent with new loads
            self._next_data_track = curr + inc
            self._prev_data_track = curr - inc
            # attach data to object
            if not self._curr_data.empty:
                self.data = self._curr_data.copy()
                self.meta = self._curr_meta.copy()
            else:
                self.data = DataFrame(None)
                # line below removed as it would delete previous meta, if any
                # if you end a seasonal analysis with a day with no data, then
                # no meta: self.meta = _meta.Meta()

            if self.multi_file_day:
                self.data = self.data.ix[self.date:self.date + pds.DateOffset(
                    hours=23, minutes=59, seconds=59, nanoseconds=99999999)]

            # pad data based upon passed parameter
            if (not self._prev_data.empty) & (not self.data.empty):
                if self.multi_file_day and self._load_by_date:
                    padLeft = self._prev_data.ix[(
                        self.date):self._curr_data.index[0]]
                else:
                    padLeft = self._prev_data.ix[(
                        self._curr_data.index[0] -
                        self.pad):self._curr_data.index[0]]
                #self.data = pds.concat([padLeft[0:-1], self.data])
                self.data = pds.concat([padLeft, self.data])

            if (not self._next_data.empty) & (not self.data.empty):

                if self.multi_file_day and self._load_by_date:
                    padRight = self._next_data.ix[self.date : (self.date + \
        pds.DateOffset(hours=23, minutes=59, seconds=59, nanoseconds=99999999))]
                else:
                    padRight = self._next_data.ix[self._curr_data.index[-1]:(
                        self._curr_data.index[-1] + self.pad)]
                #self.data = pds.concat([self.data, padRight[1:]])
                self.data = pds.concat([self.data, padRight])

            # drop any possible duplicate index times
            #self.data.drop_duplicates(inplace=True)
            self.data = self.data[~self.data.index.duplicated()]

        # if self.pad is False, load single day
        else:
            self.data, meta = self._load_data(date=self.date, fid=self._fid)
            if not self.data.empty:
                self.meta = meta

        # check if load routine actually returns meta
        if self.meta.data.empty:
            self.meta[self.data.columns] = {
                'long_name': self.data.columns,
                'units': [''] * len(self.data.columns)
            }
        # if loading by file set the yr, doy, and date
        if not self._load_by_date:
            temp = self.data.index[0]
            temp = pds.datetime(temp.year, temp.month, temp.day)
            self.date = temp
            self.yr, self.doy = utils.getyrdoy(self.date)

        if not self.data.empty:
            self._default_rtn(self)
        # clean
        if (not self.data.empty) & (self.clean_level != 'none'):
            self._clean_rtn(self)
        # apply custom functions
        if not self.data.empty:
            self.custom._apply_all(self)
        # remove the excess padding, if any applied
        if (self.pad is not None) & (not self.data.empty) & (not verifyPad):
            self.data = self.data[self._curr_data.index[0]:self._curr_data.
                                  index[-1]]

        sys.stdout.flush()
        return

    def download(self, start, stop, freq='D', user=None, password=None):
        """Download data for given Instrument object from start to stop.
        
        Parameters
        ----------
        start : pandas.datetime
            start date to download data
        stop : pandas.datetime
            stop date to download data
        freq : string
            Stepsize between dates for season, 'D' for daily, 'M' monthly 
            (see pandas)
        user : string
            username, if required by instrument data archive
        password : string
            password, if required by instrument data archive
            
        Note
        ----
        Data will be downloaded to pysat_data_dir/patform/name/tag
        
        If Instrument bounds are set to defaults they are updated
        after files are downloaded.
        
        """
        import errno
        # make sure directories are there, otherwise create them
        try:
            os.makedirs(self.files.data_path)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
        print('Downloading data to: ', self.files.data_path)
        date_array = utils.season_date_range(start, stop, freq=freq)
        if user is None:
            self._download_rtn(date_array,
                               tag=self.tag,
                               sat_id=self.sat_id,
                               data_path=self.files.data_path)
        else:
            self._download_rtn(date_array,
                               tag=self.tag,
                               sat_id=self.sat_id,
                               data_path=self.files.data_path,
                               user=user,
                               password=password)
        # get current file date range
        first_date = self.files.start_date
        last_date = self.files.stop_date

        print('Updating pysat file list')
        self.files.refresh()

        # if instrument object has default bounds, update them
        if len(self.bounds[0]) == 1:
            if (self.bounds[0][0] == first_date
                    and self.bounds[1][0] == last_date):
                print('Updating instrument object bounds.')
                self.bounds = None

    @property
    def bounds(self):
        """Boundaries for iterating over instrument object by date or file.

        Parameters
        ----------
        start : datetime object, filename, or None (default)
            start of iteration, if None uses first data date.
            list-like collection also accepted
        end :  datetime object, filename, or None (default)
                end of iteration, inclusive. If None uses last data date.
                list-like collection also accepted

        Note
        ----
        Both start and stop must be the same type (date, or filename) or None

        Examples
        --------
        ::

            inst = pysat.Instrument(platform=platform,
                                    name=name,
                                    tag=tag)
            start = pysat.datetime(2009,1,1)
            stop = pysat.datetime(2009,1,31)
            inst.bounds = (start,stop)

            start2 = pysat.datetetime(2010,1,1)
            stop2 = pysat.datetime(2010,2,14)
            inst.bounds = ([start, start2], [stop, stop2])

        """
        return self._iter_start, self._iter_stop

    @bounds.setter
    def bounds(self, value=None):
        if value is None:
            value = (None, None)
        if len(value) < 2:
            raise ValueError('Must supply both a start and end date/file' +
                             'Supply None if you want the first/last possible')

        start = value[0]
        end = value[1]
        # get the frequency, or step size, of season
        if len(value) == 3:
            step = value[2]
        else:
            # default do daily
            step = 'D'

        if (start is None) and (end is None):
            # set default
            self._iter_start = [self.files.start_date]
            self._iter_stop = [self.files.stop_date]
            self._iter_type = 'date'
            if self._iter_start[0] is not None:
                # check here in case Instrument is initialized with no input
                self._iter_list = utils.season_date_range(self._iter_start,
                                                          self._iter_stop,
                                                          freq=step)

        elif (hasattr(start, '__iter__') and not isinstance(start, str)) and (
                hasattr(end, '__iter__') and not isinstance(end, str)):
            base = type(start[0])
            for s, t in zip(start, end):
                if (type(s) != type(t)) or (type(s) != base):
                    raise ValueError(
                        'Start and end items must all be of the same type')
            if isinstance(start[0], str):
                self._iter_type = 'file'
                self._iter_list = self.files.get_file_array(start, end)
            elif isinstance(start[0], pds.datetime):
                self._iter_type = 'date'
                self._iter_list = utils.season_date_range(start,
                                                          end,
                                                          freq=step)
            else:
                raise ValueError(
                    'Input is not a known type, string or datetime')
            self._iter_start = start
            self._iter_stop = end

        elif (hasattr(start, '__iter__') and not isinstance(start, str)) or (
                hasattr(end, '__iter__') and not isinstance(end, str)):
            raise ValueError(
                'Both start and end must be iterable if one bound is iterable')

        elif isinstance(start, str) or isinstance(end, str):
            if isinstance(start, pds.datetime) or isinstance(
                    end, pds.datetime):
                raise ValueError('Not allowed to mix file and date bounds')
            if start is None:
                start = self.files[0]
            if end is None:
                end = self.files.files[-1]
            self._iter_start = [start]
            self._iter_stop = [end]
            self._iter_list = self.files.get_file_array(
                self._iter_start, self._iter_stop)
            self._iter_type = 'file'

        elif isinstance(start, pds.datetime) or isinstance(end, pds.datetime):
            if start is None:
                start = self.files.start_date
            if end is None:
                end = self.files.stop_date
            self._iter_start = [start]
            self._iter_stop = [end]
            self._iter_list = utils.season_date_range(start, end, freq=step)
            self._iter_type = 'date'
        else:
            raise ValueError(
                'Provided an invalid combination of bounds. ' +
                'if specifying by file, both bounds must be by file. Other ' +
                'combinations of datetime objects and None are allowed.')

    def __iter__(self):
        """Iterates instrument object by loading subsequent days or files.

        Note
        ----
        Limits of iteration, and iteration type (date/file)
        set by `bounds` attribute.

        Default bounds are the first and last dates from files on local system.

        Examples
        --------
        ::

            inst = pysat.Instrument(platform=platform,
                                    name=name,
                                    tag=tag)
            start = pysat.datetime(2009,1,1)
            stop = pysat.datetime(2009,1,31)
            inst.bounds = (start,stop)
            for inst in inst:
                print('Another day loaded', inst.date)

        """

        if self._iter_type == 'file':
            for fname in self._iter_list:
                self.load(fname=fname)
                yield self

        elif self._iter_type == 'date':
            for date in self._iter_list:
                self.load(date=date)
                yield self

    def next(self):
        """Manually iterate through the data loaded in Instrument object.
        
        Bounds of iteration and iteration type (day/file) are set by 
        `bounds` attribute.
        
        Note
        ----
        If there were no previous calls to load then the 
        first day(default)/file will be loaded.
         
        """

        if self._iter_type == 'date':
            if self.date is not None:
                idx, = np.where(self._iter_list == self.date)
                if (len(idx) == 0) | (idx + 1 >= len(self._iter_list)):
                    raise StopIteration('Outside the set date boundaries.')
                else:
                    idx += 1
                    self.load(date=self._iter_list[idx[0]])
            else:
                self.load(date=self._iter_list[0])

        elif self._iter_type == 'file':
            if self._fid is not None:
                first = self.files.get_index(self._iter_list[0])
                last = self.files.get_index(self._iter_list[-1])
                if (self._fid < first) | (self._fid + 1 > last):
                    raise StopIteration('Outside the set file boundaries.')
                else:
                    self.load(fname=self._iter_list[self._fid + 1 - first])
            else:
                self.load(fname=self._iter_list[0])

    def prev(self):
        """Manually iterate backwards through the data in Instrument object.
        
        Bounds of iteration and iteration type (day/file) 
        are set by `bounds` attribute.
        
        Note
        ----
        If there were no previous calls to load then the 
        first day(default)/file will be loaded.
            
        """

        if self._iter_type == 'date':
            if self.date is not None:
                idx, = np.where(self._iter_list == self.date)
                if (len(idx) == 0) | (idx - 1 < 0):
                    raise StopIteration('Outside the set date boundaries.')
                else:
                    idx -= 1
                    self.load(date=self._iter_list[idx[0]])
            else:
                self.load(date=self._iter_list[-1])

        elif self._iter_type == 'file':
            if self._fid is not None:
                first = self.files.get_index(self._iter_list[0])
                last = self.files.get_index(self._iter_list[-1])
                if (self._fid - 1 < first) | (self._fid > last):
                    raise StopIteration('Outside the set file boundaries.')
                else:
                    self.load(fname=self._iter_list[self._fid - 1 - first])
            else:
                self.load(fname=self._iter_list[-1])

    def to_netcdf4(self, fname=None, format=None):
        """Stores loaded data into a netCDF3/4 file.
        
        Parameters
        ----------
        fname : string
            full path to save instrument object to
        format : string
            format keyword passed to netCDF4 routine
            NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, and NETCDF4
        
        Note
        ----
        Stores 1-D data along dimension 'time' - the date time index.
        
        Stores object data (e.g. dataframes within series) separately
                    
         - The name of the series is used to prepend extra variable
           dimensions within netCDF, key_2, key_3; first dimension time
         - The index organizing the data stored as key_sample_index
         - from_netcdf3 uses this naming scheme to reconstruct data structure
            
        The datetime index is stored as 'UNIX time'. netCDF-3 doesn't support
        64-bit integers so it is stored as a 64-bit float. This results in a
        loss of datetime precision when converted back to datetime index
        up to hundreds of nanoseconds. Use netCDF4 if this is a problem.
          
        All attributes attached to instrument meta are written to netCDF attrs.
        
        """

        import netCDF4

        if format is None:
            format = 'NETCDF3_64BIT'
        else:
            format = format.upper()

        with netCDF4.Dataset(fname, mode='w', format=format) as out_data:

            num = len(self.data.index)
            out_data.createDimension('time', num)

            # write out the datetime index
            cdfkey = out_data.createVariable(
                'time',
                'f8',
                dimensions=('time'),
            )
            cdfkey.units = 'seconds since 1970-1-1 0:0:0'
            cdfkey.long_name = 'UNIX time'
            cdfkey.calendar = 'standard'
            cdfkey[:] = (self.data.index.astype(int) *
                         1.E-3).astype(int) * 1.E-6
            # store all of the data in dataframe columns
            for key in self.data.columns:
                if self[key].dtype != np.dtype('O'):
                    # not an object, simple column of data, write it out
                    if ((self[key].dtype == np.int64) &
                        (format[:7] == 'NETCDF3')):
                        self[key] = self[key].astype(np.int32)
                    cdfkey = out_data.createVariable(
                        key,
                        self[key].dtype,
                        dimensions=('time'),
                    )
                    cdfkey.units = self.meta[key].units
                    cdfkey.long_name = self.meta[key].long_name
                    cdfkey[:] = self[key].values
                else:
                    # we are dealing with a more complicated object
                    # presuming a series with a dataframe in each location
                    dims = np.shape(self[key].iloc[0])
                    obj_dim_names = []
                    # don't need to recreate last dimension,
                    # it covers number of columns
                    for i, dim in enumerate(dims[:-1]):
                        obj_dim_names.append(key + '_dim_%i' % (i + 1))
                        out_data.createDimension(obj_dim_names[-1], dim)
                    var_dim = tuple(['time'] + obj_dim_names)
                    #print (key, var_dim)
                    # iterate over columns and store
                    try:
                        iterable = self[key].iloc[0].columns
                        is_frame = True
                    except AttributeError:
                        # looking at a series, which doesn't have columns
                        iterable = self[key].iloc[0].name
                        is_frame = False

                    for col in iterable:
                        if is_frame:
                            coltype = self[key].iloc[0][col].dtype
                        else:
                            coltype = self[key].iloc[0].dtype
                        if ((coltype == np.int64) & (format[:7] == 'NETCDF3')):
                            coltype = np.int32
                        #elif coltype == np.dtype('O'):
                        #    if isinstance(self[key].iloc[0][col][0], basestring):
                        #        coltype = 'S1'
                        #print (key+'_' +col, var_dim, coltype)
                        cdfkey = out_data.createVariable(key + '_' + col,
                                                         coltype,
                                                         dimensions=var_dim)
                        cdfkey.long_name = col
                        cdfkey.units = ''
                        if is_frame:
                            for i in xrange(num):
                                cdfkey[i, :] = self[key].iloc[i][
                                    col].values.astype(coltype)
                        else:
                            #print (self[key])
                            print(np.shape(cdfkey))
                            for i in xrange(num):
                                print(i)
                                cdfkey[i, :] = self[key].iloc[i].values.astype(
                                    coltype)

                    # store the dataframe index for each time of main dataframe
                    datetime_flag = False
                    coltype = self[key].iloc[0].index.dtype
                    # check for datetime index
                    if coltype == np.dtype('<M8[ns]'):
                        coltype = 'f8'
                        datetime_flag = True
                    if coltype == np.int64:
                        coltype = np.int32
                    #print (key+'_' + '_ample', var_dim, coltype)
                    cdfkey = out_data.createVariable(key + '_dim_1',
                                                     coltype,
                                                     dimensions=var_dim)
                    if datetime_flag:
                        cdfkey.units = 'seconds since 1970-1-1 0:0:0'
                        cdfkey.long_name = 'UNIX time'
                        for i in xrange(num):
                            cdfkey[i, :] = (self[key].iloc[i].index.astype(int)
                                            * 1.E-3).astype(int) * 1.E-6
                    else:
                        cdfkey.units = ''
                        if self[key].iloc[0].index.name is not None:
                            cdfkey.long_name = self[key].iloc[0].index.name
                        else:
                            cdfkey.long_name = key
                        for i in xrange(num):
                            cdfkey[i, :] = self[key].iloc[
                                i].index.to_native_types()

            # store any non standard attributes
            base_attrb = dir(Instrument())
            this_attrb = dir(self)

            adict = {}
            for key in this_attrb:
                if key not in base_attrb:
                    if key[0] != '_':
                        adict[key] = self.__getattribute__(key)
            # store any non-standard attributes attached to meta
            base_attrb = dir(_meta.Meta())
            this_attrb = dir(self.meta)
            for key in this_attrb:
                if key not in base_attrb:
                    if key[0] != '_':
                        adict[key] = self.meta.__getattribute__(key)
            adict['pysat_version'] = 1.0
            adict['Conventions'] = 'CF-1.6'

            # check for binary types
            for key in adict.keys():
                if isinstance(adict[key], bool):
                    adict[key] = int(adict[key])

            out_data.setncatts(adict)
        return

Exemple #13

0

Afficher le fichier

Fichier : _instrument.py Projet : yangjian615/pysat

    def __init__(self,
                 platform=None,
                 name=None,
                 tag=None,
                 sat_id=None,
                 clean_level='clean',
                 update_files=None,
                 pad=None,
                 orbit_info=None,
                 inst_module=None,
                 multi_file_day=None,
                 manual_org=None,
                 directory_format=None,
                 file_format=None,
                 temporary_file_list=False,
                 *arg,
                 **kwargs):

        if inst_module is None:
            # use strings to look up module name
            if isinstance(platform, str) and isinstance(name, str):
                self.platform = platform.lower()
                self.name = name.lower()
                # look to module for instrument functions and defaults
                self._assign_funcs(by_name=True)
            elif (platform is None) and (name is None):
                # creating "empty" Instrument object with this path
                self.name = ''
                self.platform = ''
                self._assign_funcs()
            else:
                raise ValueError(
                    'Inputs platform and name must both be strings, or both None.'
                )
        else:
            # user has provided a module
            try:
                # platform and name are expected to be part of module
                self.name = inst_module.name.lower()
                self.platform = inst_module.platform.lower()
            except AttributeError:
                raise AttributeError(
                    string.join((
                        'A name and platform attribute for the ',
                        'instrument is required if supplying routine module directly.'
                    )))
            # look to module for instrument functions and defaults
            self._assign_funcs(inst_module=inst_module)

        # more reasonable defaults for optional parameters
        self.tag = tag.lower() if tag is not None else ''
        self.sat_id = sat_id.lower() if sat_id is not None else ''
        self.clean_level = (clean_level.lower()
                            if clean_level is not None else 'none')

        # assign_func sets some instrument defaults, direct info rules all
        if directory_format is not None:
            self.directory_format = directory_format.lower()
        # value not provided by user, check if there is a value provided by
        # instrument module
        elif self.directory_format is not None:
            try:
                # check if it is a function
                self.directory_format = self.directory_format(tag, sat_id)
            except TypeError:
                pass

        if file_format is not None:
            self.file_format = file_format
        # value not provided by user, check if there is a value provided by
        # instrument module
        elif self.file_format is not None:
            # check if it is an iterable string.  If it isn't formatted
            # properly, give a warning and set file_format to None
            if (not isinstance(self.file_format, str)
                    or self.file_format.find("{") < 0
                    or self.file_format.find("}") < 1):
                estr = 'file format set to default, supplied string must be '
                estr = '{:s}iteratable [{:}]'.format(estr, self.file_format)
                print(estr)
                self.file_format = None

        # set up empty data and metadata
        self.data = DataFrame(None)
        self.meta = _meta.Meta()

        # function processing class, processes data on load
        self.custom = _custom.Custom()
        # create arrays to store data around loaded day
        # enables padding across day breaks with minimal loads
        self._next_data = DataFrame(None)
        self._next_data_track = []
        self._prev_data = DataFrame(None)
        self._prev_data_track = []
        self._curr_data = DataFrame(None)

        # multi file day, default set by assign_funcs
        if multi_file_day is not None:
            self.multi_file_day = multi_file_day

        # arguments for padding
        if isinstance(pad, pds.DateOffset):
            self.pad = pad
        elif isinstance(pad, dict):
            self.pad = pds.DateOffset(**pad)
        elif pad is None:
            self.pad = None
        else:
            estr = 'pad must be a dictionary or a pandas.DateOffset instance.'
            raise ValueError(estr)

        # instantiate Files class
        manual_org = False if manual_org is None else manual_org
        temporary_file_list = not temporary_file_list
        self.files = _files.Files(self,
                                  manual_org=manual_org,
                                  directory_format=self.directory_format,
                                  update_files=update_files,
                                  file_format=self.file_format,
                                  write_to_disk=temporary_file_list)

        # set bounds for iteration
        # self.bounds requires the Files class
        # setting (None,None) loads default bounds
        self.bounds = (None, None)
        self.date = None
        self._fid = None
        self.yr = None
        self.doy = None
        self._load_by_date = False

        # initialize orbit support
        if orbit_info is None:
            if self.orbit_info is None:
                # if default info not provided, set None as default
                orbit_info = {'index': None, 'kind': None, 'period': None}
            else:
                # default provided by instrument module
                orbit_info = self.orbit_info
        self.orbits = _orbits.Orbits(self, **orbit_info)

        # store kwargs, passed to load routine
        self.kwargs = kwargs

        # run instrument init function, a basic pass function is used
        # if user doesn't supply the init function
        self._init_rtn(self)

Exemple #14

0

Afficher le fichier

Fichier : _meta.py Projet : jcspence/pysat-1

    def __setitem__(self, name, value):
        """Convenience method for adding metadata.
        
        Examples
        --------
        ::
        
            meta = pysat.Meta()
            meta['name'] = {'long_name':string, 'units':string}
            # update 'units' to new value
            meta['name'] = {'units':string}
            # update 'long_name' to new value
            meta['name'] = {'long_name':string}
            # attach new info with partial information, 'long_name' set to 'name2'
            meta['name2'] = {'units':string}
            # units are set to '' by default
            meta['name3'] = {'long_name':string}
        
        """
        
        if isinstance(value, dict):
            # check if dict empty
            if value.keys() == []:
                # null input, everything should be set to default
                if isinstance(name, basestring):
                    if name in self:
                        # variable already exists and we don't have anything
                        # new to add, just leave
                        return
                    # otherwise, continue on and set defaults
                else:
                    new_name = []
                    for n in name:
                        if n not in self:
                            new_name.append(n)
                    name = new_name
                    if len(name) == 0:
                        # all variables exist, can leave
                        return
                    else:
                        # otherwise, continue on and set defaults
                        # create empty input for all remaining names
                        value = {}
                        value[self._units_label] = ['']*len(name)
                        value[self._name_label] = name
                        # for na in name:
                        #     value[na] = [[]]


            # if not passed an iterable, make it one
            if isinstance(name, basestring):
                name = [name]
                for key in value.keys():
                    value[key] = [value[key]]

            # if len(name) != len(value):
            #     raise ValueError('Length of names and all inputs must be equal.')

            for key in value.keys():
                if len(name) != len(value[key]):
                    raise ValueError('Length of names and inputs must be equal.')

            if 'meta' in value.keys():
                # process higher order stuff first
                # could be part of multiple assignment
                # so assign the Meta objects, then remove all trace
                # of names with Meta
                pop_list = []
                pop_loc = []
                for j, (item, val) in enumerate(zip(name, value['meta'])):
                    if val is not None:
                        # assign meta data, recursive call....
                        self[item] = val
                        pop_list.append(item)
                        pop_loc.append(j)
                        
                # remove 'meta' objects from input
                if len(value.keys()) > 1:
                    _ = value.pop('meta')
                else:
                    value = {}
                    name = []
                    
                for item, loc in zip(pop_list[::-1], pop_loc[::-1]):
                    # remove data names that had a Meta object assigned
                    # they are not part of any future processing
                    if len(name) > 1:
                        _ = name.pop(loc)
                    else:
                        name = []
                    # remove place holder data in other values that used
                    # to have to account for presence of Meta object
                    # going through backwards so I don't mess with location references
                    for key in value.keys():
                        _ = value[key].pop(loc)

            lower_keys = [k.lower() for k in value.keys()]
            if 'units' not in lower_keys:
                # provide default value, or copy existing
                value[self._units_label] = []
                for item_name in name:
                    if item_name not in self:
                        value[self._units_label].append('')
                    else:
                        value[self._units_label].append(self[item_name, 'units'])
            # need to ensure that the units string is consistent with the rest
            # probably, that is

            if 'long_name' not in lower_keys:
                # provide default value, or copy existing
                value[self._name_label] = []
                for item_name in name:
                    if item_name not in self:
                        value[self._name_label].append(item_name)
                    else:
                        value[self._name_label].append(self[item_name,'long_name'])
            if len(name) > 0:
                # make sure there is still something to add
                new = DataFrame(value, index=name)
                for item_name,item in new.iterrows():
                    if item_name not in self:
                        self.data = self.data.append(item)
                    else:
                        # info already exists, update with new info
                        for item_key in item.keys():
                            self.data.loc[item_name, item_key] = item[item_key]

        elif isinstance(value, Series):
            self.data.loc[name] = value

        elif isinstance(value, Meta):
            # dealing with higher order data set
            self.ho_data[name] = value

Exemple #15

0

Afficher le fichier

def load_netcdf4(fnames=None,
                 strict_meta=False,
                 format=None):  #, index_label=None,
    # unix_time=False, **kwargs):
    """Load netCDF-3/4 file produced by pysat.
    
    Parameters
    ----------
    fnames : string or array_like of strings
        filenames to load
    strict_meta : boolean
        check if metadata across fnames is the same
    format : string
        format keyword passed to netCDF4 routine
        NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, and NETCDF4
     
    """

    import netCDF4
    import string
    import pysat

    if fnames is None:
        raise ValueError("Must supply a filename/list of filenames")
    if isinstance(fnames, basestring):
        fnames = [fnames]

    if format is None:
        format = 'NETCDF3_64BIT'
    else:
        format = format.upper()

    saved_mdata = None
    running_idx = 0
    running_store = []
    two_d_keys = []
    two_d_dims = []
    for fname in fnames:
        with netCDF4.Dataset(fname, mode='r', format=format) as data:
            # build up dictionary with all ncattrs
            # and add those attributes to a pysat meta object
            ncattrsList = data.ncattrs()
            mdata = pysat.Meta()
            for d in ncattrsList:
                if hasattr(mdata, d):
                    mdata.__setattr__(d + '_', data.getncattr(d))
                else:
                    mdata.__setattr__(d, data.getncattr(d))

            # loadup all of the variables in the netCDF
            loadedVars = {}
            for key in data.variables.keys():
                # load up metadata
                # from here group unique dimensions and act accordingly, 1D, 2D, 3D
                if len(data.variables[key].dimensions) == 1:
                    # assuming basic time dimension
                    loadedVars[key] = data.variables[key][:]
                    if key != 'time':
                        mdata[key] = {
                            'long_name': data.variables[key].long_name,
                            'units': data.variables[key].units
                        }
                        # 'nc_dimensions':data.variables[key].dimensions}

                if len(data.variables[key].dimensions) == 2:
                    # part of dataframe within dataframe
                    two_d_keys.append(key)
                    two_d_dims.append(data.variables[key].dimensions)

            # we now have a list of keys that need to go into a dataframe,
            # could be more than one, collect unique dimensions for 2D keys
            for dim in set(two_d_dims):

                # get the name of the final data column
                # dimension naming follows name_dim_number,
                # pull out name by finding last _ and tracking back
                obj_key_name = dim[1][:-string.find(dim[1][::-1], '_') - 5]
                # collect variable names associated with object
                obj_var_keys = []
                for tkey, tdim in zip(two_d_keys, two_d_dims):
                    if tdim == dim:
                        obj_var_keys.append(tkey)

                # loop over first object dimension
                # preallocate dataframes to hold objects because it is faster
                init_frame = DataFrame(None)
                loop_list = [init_frame
                             ] * data.variables[obj_var_keys[0]].shape[0]
                for i, loop_frame in enumerate(loop_list):
                    loop_frame = init_frame.copy()
                    for key in obj_var_keys:
                        loop_frame[key[len(obj_key_name) +
                                       1:]] = data.variables[key][i, :]

                    # if the object index uses unix time, process into datetime index
                    if data.variables[obj_key_name +
                                      '_dim_1'].long_name == 'UNIX time':
                        # nanosecond resolution from datetime64 can't be stored in netcdf3
                        # no 64-bit integers
                        # it is stored as a float, need to undo processing
                        # due to precision loss, resolution limited to the microsecond
                        loop_frame.index = pds.to_datetime(
                            (1E6 * loop_frame['dim_1']).astype(int) * 1000)
                        loop_frame.index.name = 'time'
                    else:
                        loop_frame.index = loop_frame['dim_1']
                        loop_frame.index.name = data.variables[
                            obj_key_name + '_dim_1'].long_name

                    del loop_frame['dim_1']
                    loop_list[i] = loop_frame
                    #print (loop_list[i] )
                    #loop_list.append(loop_frame)
                # add object data to loaded data dictionary
                loadedVars[obj_key_name] = loop_list
                del loop_list

            # prepare dataframe index for this netcdf file
            loadedVars['time'] = pds.to_datetime(
                (loadedVars.pop('time') * 1E6).astype(int) * 1000)
            running_store.append(loadedVars)
            running_idx += len(loadedVars['time'])
            # if index_label is not None:
            #     if unix_time:
            #         loadedVars['_index'] = pds.to_datetime((loadedVars.pop(index_label)*1E6).astype(int)*1000)
            #     else:
            #         loadedVars['_index'] = loadedVars.pop(index_label)
            #     running_store.append(loadedVars)
            #     running_idx += len(loadedVars['_index'])
            # else:
            #     # keep a running integer index if none provided
            #     num = len(loadedVars[loadedVars.keys()[0]])
            #     # this only guaranteed to work if all variables share the same
            #     # first dimension
            #     loadedVars['_index'] = np.arange(num) + running_idx
            #     running_store.append(loadedVars)
            #     running_idx += num

            if strict_meta:
                if saved_mdata is None:
                    saved_mdata = copy.deepcopy(mdata)
                elif (mdata != saved_mdata):
                    raise ValueError(
                        'Metadata across filenames is not the same.')

    # combine all of the data loaded across files together
    # currently doesn't work if list of dicts of lists is provided
    # in other words, only one file at a time
    out = DataFrame.from_records(running_store[0], index='time')

    return out, mdata

Exemple #16

0

Afficher le fichier

class Meta(object):
    """ Stores metadata for Instrument instance, similar to CF-1.6 netCDFdata
    standard.

    Parameters
    ----------
    metadata : pandas.DataFrame
        DataFrame should be indexed by variable name that contains at minimum
        the standard_name (name), units, and long_name for the data stored in
        the associated pysat Instrument object.

    units_label : str
        String used to label units in storage. Defaults to 'units'.
    name_label : str
        String used to label long_name in storage. Defaults to 'long_name'.
    notes_label : str
        String used to label 'notes' in storage. Defaults to 'notes'
    desc_label : str
        String used to label variable descriptions in storage.
        Defaults to 'desc'
    plot_label : str
        String used to label variables in plots. Defaults to 'label'
    axis_label : str
        Label used for axis on a plot. Defaults to 'axis'
    scale_label : str
        string used to label plot scaling type in storage. Defaults to 'scale'
    min_label : str
        String used to label typical variable value min limit in storage.
        Defaults to 'value_min'
    max_label : str
        String used to label typical variable value max limit in storage.
        Defaults to 'value_max'
    fill_label : str
        String used to label fill value in storage. Defaults to 'fill' per
        netCDF4 standard


    Attributes
    ----------
    data : pandas.DataFrame
        index is variable standard name, 'units', 'long_name', and other
        defaults are also stored along with additional user provided labels.

    units_label : str
        String used to label units in storage. Defaults to 'units'.
    name_label : str
        String used to label long_name in storage. Defaults to 'long_name'.
    notes_label : str
       String used to label 'notes' in storage. Defaults to 'notes'
    desc_label : str
       String used to label variable descriptions in storage.
       Defaults to 'desc'
    plot_label : str
       String used to label variables in plots. Defaults to 'label'
    axis_label : str
        Label used for axis on a plot. Defaults to 'axis'
    scale_label : str
       string used to label plot scaling type in storage. Defaults to 'scale'
    min_label : str
       String used to label typical variable value min limit in storage.
       Defaults to 'value_min'
    max_label : str
       String used to label typical variable value max limit in storage.
       Defaults to 'value_max'
    fill_label : str
        String used to label fill value in storage. Defaults to 'fill' per
        netCDF4 standard
    export_nan: list
        List of labels that should be exported even if their value is nan.
        By default, metadata with a value of nan will be exluded from export.


    Notes
    -----
    Meta object preserves the case of variables and attributes as it first
    receives the data. Subsequent calls to set new metadata with the same
    variable or attribute will use case of first call. Accessing or setting
    data thereafter is case insensitive. In practice, use is case insensitive
    but the original case is preserved. Case preseveration is built in to
    support writing files with a desired case to meet standards.

    Metadata for higher order data objects, those that have
    multiple products under a single variable name in a pysat.Instrument
    object, are stored by providing a Meta object under the single name.

    Supports any custom metadata values in addition to the expected metadata
    attributes (units, name, notes, desc, plot_label, axis, scale, value_min,
    value_max, and fill). These base attributes may be used to programatically
    access and set types of metadata regardless of the string values used for
    the attribute. String values for attributes may need to be changed
    depending upon the standards of code or files interacting with pysat.

    Meta objects returned as part of pysat loading routines are automatically
    updated to use the same values of plot_label, units_label, etc. as found
    on the pysat.Instrument object.

    Examples
    --------
    ::

        # instantiate Meta object, default values for attribute labels are used
        meta = pysat.Meta()
        # set a couple base units
        # note that other base parameters not set below will
        # be assigned a default value
        meta['name'] = {'long_name':string, 'units':string}
        # update 'units' to new value
        meta['name'] = {'units':string}
        # update 'long_name' to new value
        meta['name'] = {'long_name':string}
        # attach new info with partial information, 'long_name' set to 'name2'
        meta['name2'] = {'units':string}
        # units are set to '' by default
        meta['name3'] = {'long_name':string}

        # assigning custom meta parameters
        meta['name4'] = {'units':string, 'long_name':string
                         'custom1':string, 'custom2':value}
        meta['name5'] = {'custom1':string, 'custom3':value}

        # assign multiple variables at once
        meta[['name1', 'name2']] = {'long_name':[string1, string2],
                                    'units':[string1, string2],
                                    'custom10':[string1, string2]}

        # assiging metadata for n-Dimensional variables
        meta2 = pysat.Meta()
        meta2['name41'] = {'long_name':string, 'units':string}
        meta2['name42'] = {'long_name':string, 'units':string}
        meta['name4'] = {'meta':meta2}
        # or
        meta['name4'] = meta2
        meta['name4'].children['name41']

        # mixture of 1D and higher dimensional data
        meta = pysat.Meta()
        meta['dm'] = {'units':'hey', 'long_name':'boo'}
        meta['rpa'] = {'units':'crazy', 'long_name':'boo_whoo'}
        meta2 = pysat.Meta()
        meta2[['higher', 'lower']] = {'meta':[meta, None],
                                      'units':[None, 'boo'],
                                      'long_name':[None, 'boohoo']}

        # assign from another Meta object
        meta[key1] = meta2[key2]

        # access fill info for a variable, presuming default label
        meta[key1, 'fill']
        # access same info, even if 'fill' not used to label fill values
        meta[key1, meta.fill_label]


        # change a label used by Meta object
        # note that all instances of fill_label
        # within the meta object are updated
        meta.fill_label = '_FillValue'
        meta.plot_label = 'Special Plot Variable'
        # this feature is useful when converting metadata within pysat
        # so that it is consistent with externally imposed file standards

    """
    def __init__(self,
                 metadata=None,
                 units_label='units',
                 name_label='long_name',
                 notes_label='notes',
                 desc_label='desc',
                 plot_label='label',
                 axis_label='axis',
                 scale_label='scale',
                 min_label='value_min',
                 max_label='value_max',
                 fill_label='fill',
                 export_nan=[]):

        # set mutability of Meta attributes
        self.mutable = True

        # set units and name labels directly
        self._units_label = units_label
        self._name_label = name_label
        self._notes_label = notes_label
        self._desc_label = desc_label
        self._plot_label = plot_label
        self._axis_label = axis_label
        self._scale_label = scale_label
        self._min_label = min_label
        self._max_label = max_label
        self._fill_label = fill_label
        # by default metadata with a value of nan will not be exported
        # unless the name is in the _export_nan list. Initialize the list
        # with the fill label, since it is reasonable to assume that a fill
        # value of nan would be intended to be exported
        self._export_nan = [fill_label] + export_nan
        # init higher order (nD) data structure container, a dict
        self._ho_data = {}
        # use any user provided data to instantiate object with data
        # attirube unit and name labels are called within
        if metadata is not None:
            if isinstance(metadata, DataFrame):
                self._data = metadata
                # make sure defaults are taken care of for required metadata
                self.accept_default_labels(self)
            else:
                raise ValueError(''.join(('Input must be a pandas DataFrame',
                                          'type. See other constructors for',
                                          ' alternate inputs.')))
        else:
            self._data = DataFrame(None,
                                   columns=[
                                       self._units_label, self._name_label,
                                       self._desc_label, self._plot_label,
                                       self._axis_label, self._scale_label,
                                       self.notes_label, self._min_label,
                                       self._max_label, self._fill_label
                                   ])

        # establish attributes intrinsic to object, before user can
        # add any
        self._base_attr = dir(self)

    @property
    def ho_data(self):
        return self._ho_data

    @property
    def data(self):
        return self._data

    @data.setter
    def data(self, new_frame):
        self._data = new_frame
        # self.keys = self._data.columns.lower()

    @ho_data.setter
    def ho_data(self, new_dict):
        self._ho_data = new_dict

    @property
    def empty(self):
        """Return boolean True if there is no metadata"""

        # only need to check on lower data since lower data
        # is set when higher metadata assigned
        if self.data.empty:
            return True
        else:
            return False

    def merge(self, other):
        """Adds metadata variables to self that are in other but not in self.

        Parameters
        ----------
        other : pysat.Meta

        """

        for key in other.keys():
            if key not in self:
                # copies over both lower and higher dimensional data
                self[key] = other[key]

    def drop(self, names):
        """Drops variables (names) from metadata."""

        # drop lower dimension data
        self.data = self._data.drop(names, axis=0)
        # drop higher dimension data
        for name in names:
            if name in self._ho_data:
                _ = self._ho_data.pop(name)

    def keep(self, keep_names):
        """Keeps variables (keep_names) while dropping other parameters

        Parameters
        ----------
        keep_names : list-like
            variables to keep
        """
        keep_names = [self.var_case_name(name) for name in keep_names]
        current_names = self._data.index
        drop_names = []
        for name in current_names:
            if name not in keep_names:
                drop_names.append(name)
        self.drop(drop_names)

    def apply_default_labels(self, other):
        """Applies labels for default meta labels from self onto other.

        Parameters
        ----------
        other : Meta
            Meta object to have default labels applied

        Returns
        -------
        Meta

        """
        other_updated = other.copy()
        other_updated.units_label = self.units_label
        other_updated.name_label = self.name_label
        other_updated.notes_label = self.notes_label
        other_updated.desc_label = self.desc_label
        other_updated.plot_label = self.plot_label
        other_updated.axis_label = self.axis_label
        other_updated.scale_label = self.scale_label
        other_updated.min_label = self.min_label
        other_updated.max_label = self.max_label
        other_updated.fill_label = self.fill_label
        return other_updated

    def accept_default_labels(self, other):
        """Applies labels for default meta labels from other onto self.

        Parameters
        ----------
        other : Meta
            Meta object to take default labels from

        Returns
        -------
        Meta

        """

        self.units_label = other.units_label
        self.name_label = other.name_label
        self.notes_label = other.notes_label
        self.desc_label = other.desc_label
        self.plot_label = other.plot_label
        self.axis_label = other.axis_label
        self.scale_label = other.scale_label
        self.min_label = other.min_label
        self.max_label = other.max_label
        self.fill_label = other.fill_label
        return

    def __contains__(self, other):
        """case insensitive check for variable name"""

        if other.lower() in [i.lower() for i in self.keys()]:
            return True
        if other.lower() in [i.lower() for i in self.keys_nD()]:
            return True
        return False

    def __repr__(self):
        return 'pysat.MetaData'

    def __str__(self, recurse=True):
        """String describing Meta instance, variables, and attributes"""

        # cover 1D parameters
        if recurse:
            output_str = 'Metadata for 1D variables\n'
        else:
            output_str = ''

        for ind in self.keys():
            output_str += ind.ljust(30)
        output_str += '\n\n'
        output_str += 'Tracking the following:\n'
        for col in self.attrs():
            output_str += col.ljust(30)

        output_str += '\n'
        if recurse:
            for item_name in self.keys_nD():
                output_str += '\n\n'
                output_str += 'Metadata for ' + item_name + '\n'
                output_str += self.ho_data[item_name].__str__(False)

        return output_str

    def _insert_default_values(self, input_name):

        default_str = ''
        default_nan = np.NaN
        labels = [
            self.units_label, self.name_label, self.notes_label,
            self.desc_label, self.plot_label, self.axis_label,
            self.scale_label, self.min_label, self.max_label, self.fill_label
        ]
        defaults = [
            default_str, input_name, default_str, default_str, input_name,
            input_name, 'linear', default_nan, default_nan, default_nan
        ]
        self._data.loc[input_name, labels] = defaults

    def __setattr__(self, name, value):
        """Conditionally sets attributes based on self.mutable flag
        @properties are assumed to be mutable.
        We avoid recursively setting properties using
        method from https://stackoverflow.com/a/15751135
        """

        # mutable handled explicitly to avoid recursion
        if name != 'mutable':

            # check if this attribute is a property
            propobj = getattr(self.__class__, name, None)
            if isinstance(propobj, property):
                # check if the property is settable
                if propobj.fset is None:
                    raise AttributeError(''.join("can't set attribute - ",
                                                 "property has no fset"))

                # make mutable in case fset needs it to be
                mutable_tmp = self.mutable
                self.mutable = True

                # set the property
                propobj.fset(self, value)

                # restore mutability flag
                self.mutable = mutable_tmp
            else:
                # a normal attribute
                if self.mutable:
                    # use Object to avoid recursion
                    super(Meta, self).__setattr__(name, value)
                else:
                    raise AttributeError(''.join(
                        ("cannot set attribute - ",
                         "object's attributes are immutable")))
        else:
            super(Meta, self).__setattr__(name, value)

    def __setitem__(self, names, input_data):
        """Convenience method for adding metadata."""

        if isinstance(input_data, dict):
            # if not passed an iterable, make it one
            if isinstance(names, basestring):
                names = [names]
                for key in input_data:
                    input_data[key] = [input_data[key]]
            elif isinstance(names, slice) and (names.step is None):
                # Check for instrument[indx,:] or instrument[idx] usage
                names = list(self.data.keys())
            # make sure the variable names are in good shape
            # Meta object is case insensitive but case preserving
            # convert given names into ones Meta has already seen
            # if new, then input names become the standard
            names = [self.var_case_name(name) for name in names]
            for name in names:
                if name not in self:
                    self._insert_default_values(name)
            # check if input dict empty
            if input_data.keys() == []:
                # meta wasn't actually assigned by user, empty call
                # we can head out - we've assigned defaults if first data
                return
            # perform some checks on the data
            # make sure number of inputs matches number of metadata inputs
            for key in input_data:
                if len(names) != len(input_data[key]):
                    raise ValueError(''.join(
                        ('Length of names and inputs', ' must be equal.')))
            # make sure the attribute names are in good shape
            # check name of attributes against existing attribute names
            # if attribute name exists somewhere, then case of existing
            # attribute
            # will be enforced upon new data by default for consistency
            keys = [i for i in input_data]
            for name in keys:
                new_name = self.attr_case_name(name)
                if new_name != name:
                    input_data[new_name] = input_data.pop(name)

            # time to actually add the metadata
            for key in input_data:
                if key not in ['children', 'meta']:
                    for i, name in enumerate(names):
                        to_be_set = input_data[key][i]
                        if hasattr(to_be_set, '__iter__') and \
                                not isinstance(to_be_set, basestring):
                            # we have some list-like object
                            # can only store a single element
                            if len(to_be_set) == 0:
                                # empty list, ensure there is something
                                to_be_set = ['']
                            if isinstance(to_be_set[0], basestring):
                                self._data.loc[name, key] = \
                                    '\n\n'.join(to_be_set)
                            else:
                                warnings.warn(' '.join(
                                    ('Array elements are',
                                     'not allowed in meta.',
                                     'Dropping input :', key)))
                        else:
                            self._data.loc[name, key] = to_be_set
                else:
                    # key is 'meta' or 'children'
                    # process higher order stuff. Meta inputs could be part of
                    # larger multiple parameter assignment
                    # so not all names may actually have 'meta' to add
                    for j, (item,
                            val) in enumerate(zip(names, input_data['meta'])):
                        if val is not None:
                            # assign meta data, recursive call....
                            # heads to if Meta instance call
                            self[item] = val

        elif isinstance(input_data, Series):
            # outputs from Meta object are a Series.
            # thus this takes in input from a Meta object
            # set data usind standard assignment via a dict
            in_dict = input_data.to_dict()
            if 'children' in in_dict:
                child = in_dict.pop('children')
                if child is not None:
                    # if not child.data.empty:
                    self.ho_data[names] = child
            # remaining items are simply assigned
            self[names] = in_dict

        elif isinstance(input_data, Meta):
            # dealing with higher order data set
            # names is only a single name here (by choice for support)
            if (names in self._ho_data) and (input_data.empty):
                # no actual metadata provided and there is already some
                # higher order metadata in self
                return

            # get Meta approved variable names
            new_item_name = self.var_case_name(names)
            # ensure that Meta labels of object to be assigned
            # are consistent with self
            # input_data accepts self's labels
            input_data.accept_default_labels(self)

            # go through and ensure Meta object to be added has variable and
            # attribute names consistent with other variables and attributes
            # this covers custom attributes not handled by default routine
            # above
            attr_names = input_data.attrs()
            new_names = []
            for name in attr_names:
                new_names.append(self.attr_case_name(name))
            input_data.data.columns = new_names
            # same thing for variables
            var_names = input_data.data.index
            new_names = []
            for name in var_names:
                new_names.append(self.var_case_name(name))
            input_data.data.index = new_names
            # assign Meta object now that things are consistent with Meta
            # object settings
            # but first, make sure there are lower dimension metadata
            # parameters, passing in an empty dict fills in defaults
            # if there is no existing metadata info
            self[new_item_name] = {}
            # now add to higher order data
            self._ho_data[new_item_name] = input_data

    def __getitem__(self, key):
        """Convenience method for obtaining metadata.

        Maps to pandas DataFrame.loc method.

        Examples
        --------
        ::

            meta['name']

            meta[ 'name1', 'units' ]

            meta[[ 'name1', 'name2'], 'units']

            meta[:, 'units']

            for higher order data

            meta[ 'name1', 'subvar', 'units' ]

            meta[ 'name1', ('units', 'scale') ]

        """

        # if key is a tuple, looking at index, column access pattern

        def match_name(func, name, names):
            """Applies func on name(s) depending on name type"""
            if isinstance(name, basestring):
                return func(name)
            elif isinstance(name, slice):
                return [func(nn) for nn in names[name]]
            else:
                # assume iterable
                return [func(nn) for nn in name]

        if isinstance(key, tuple):
            # if tuple length is 2, index, column
            if len(key) == 2:
                new_index = match_name(self.var_case_name, key[0],
                                       self.data.index)
                new_name = match_name(self.attr_case_name, key[1],
                                      self.data.columns)
                return self.data.loc[new_index, new_name]

            # if tuple length is 3, index, child_index, column
            elif len(key) == 3:
                new_index = self.var_case_name(key[0])
                new_child_index = self.var_case_name(key[1])
                new_name = self.attr_case_name(key[2])
                return self.ho_data[new_index].data.loc[new_child_index,
                                                        new_name]

        elif isinstance(key, list):
            return self[key, :]

        elif isinstance(key, basestring):
            # ensure variable is present somewhere
            if key in self:
                # get case preserved string for variable name
                new_key = self.var_case_name(key)
                # if new_key in self.keys():
                # don't need to check if in lower, all variables
                # are always in the lower metadata
                meta_row = self.data.loc[new_key]
                if new_key in self.keys_nD():
                    meta_row.at['children'] = self.ho_data[new_key].copy()
                else:
                    # empty_meta = Meta()
                    # self.apply_default_labels(empty_meta)
                    # Following line issues a pandas SettingWithCopyWarning
                    meta_row.at['children'] = None  # empty_meta
                return meta_row
                # else:
                #     return pds.Series([self.ho_data[new_key].copy()],
                #                       index=['children'])
            else:
                raise KeyError('Key not found in MetaData')
        else:
            raise NotImplementedError(
                "No way to handle MetaData key {}".format(key.__repr__()))

    def _label_setter(self,
                      new_label,
                      current_label,
                      attr_label,
                      default=np.NaN,
                      use_names_default=False):
        """Generalized setter of default meta attributes

        Parameters
        ----------
        new_label : str
            New label to use in the Meta object
        current_label : str
            The hidden attribute to be updated that actually stores metadata
        default :
            Deafult setting to use for label if there is no attribute
            value
        use_names_default : bool
            if True, MetaData variable names are used as the default
            value for the specified Meta attributes settings

        Examples
        --------
        :
                @name_label.setter
                def name_label(self, new_label):
                    self._label_setter(new_label, self._name_label,
                                        use_names_default=True)

        Notes
        -----
        Not intended for end user

        """

        if new_label not in self.attrs():
            # new label not in metadata, including case
            # update existing label, if present
            if current_label in self.attrs():
                # old label exists and has expected case
                self.data.loc[:, new_label] = self.data.loc[:, current_label]
                self.data.drop(current_label, axis=1, inplace=True)
            else:
                if self.has_attr(current_label):
                    # there is something like label, wrong case though
                    current_label = self.attr_case_name(current_label)
                    self.data.loc[:, new_label] = \
                        self.data.loc[:, current_label]
                    self.data.drop(current_label, axis=1, inplace=True)
                else:
                    # there is no existing label
                    # setting for the first time
                    if use_names_default:
                        self.data[new_label] = self.data.index
                    else:
                        self.data[new_label] = default
            # check higher order structures as well
            # recursively change labels here
            for key in self.keys_nD():
                setattr(self.ho_data[key], attr_label, new_label)

        # now update 'hidden' attribute value
        # current_label = new_label
        setattr(self, ''.join(('_', attr_label)), new_label)

    @property
    def units_label(self):
        return self._units_label

    @property
    def name_label(self):
        return self._name_label

    @property
    def notes_label(self):
        return self._notes_label

    @property
    def desc_label(self):
        return self._desc_label

    @property
    def plot_label(self):
        return self._plot_label

    @property
    def axis_label(self):
        return self._axis_label

    @property
    def scale_label(self):
        return self._scale_label

    @property
    def min_label(self):
        return self._min_label

    @property
    def max_label(self):
        return self._max_label

    @property
    def fill_label(self):
        return self._fill_label

    @units_label.setter
    def units_label(self, new_label):
        self._label_setter(new_label, self._units_label, 'units_label', '')

    @name_label.setter
    def name_label(self, new_label):
        self._label_setter(new_label,
                           self._name_label,
                           'name_label',
                           use_names_default=True)

    @notes_label.setter
    def notes_label(self, new_label):
        self._label_setter(new_label, self._notes_label, 'notes_label', '')

    @desc_label.setter
    def desc_label(self, new_label):
        self._label_setter(new_label, self._desc_label, 'desc_label', '')

    @plot_label.setter
    def plot_label(self, new_label):
        self._label_setter(new_label,
                           self._plot_label,
                           'plot_label',
                           use_names_default=True)

    @axis_label.setter
    def axis_label(self, new_label):
        self._label_setter(new_label,
                           self._axis_label,
                           'axis_label',
                           use_names_default=True)

    @scale_label.setter
    def scale_label(self, new_label):
        self._label_setter(new_label, self._scale_label, 'scale_label',
                           'linear')

    @min_label.setter
    def min_label(self, new_label):
        self._label_setter(new_label, self._min_label, 'min_label', np.NaN)

    @max_label.setter
    def max_label(self, new_label):
        self._label_setter(new_label, self._max_label, 'max_label', np.NaN)

    @fill_label.setter
    def fill_label(self, new_label):
        self._label_setter(new_label, self._fill_label, 'fill_label', np.NaN)

    def var_case_name(self, name):
        """Provides stored name (case preserved) for case insensitive input

        If name is not found (case-insensitive check) then name is returned,
        as input. This function is intended to be used to help ensure the
        case of a given variable name is the same across the Meta object.

        Parameters
        ----------
        name : str
            variable name in any case

        Returns
        -------
        str
            string with case preserved as in metaobject

        """

        lower_name = name.lower()
        if name in self:
            for i in self.keys():
                if lower_name == i.lower():
                    return i
            for i in self.keys_nD():
                if lower_name == i.lower():
                    return i
        return name

    def keys(self):
        """Yields variable names stored for 1D variables"""

        for i in self.data.index:
            yield i

    def keys_nD(self):
        """Yields keys for higher order metadata"""

        for i in self.ho_data:
            yield i

    def attrs(self):
        """Yields metadata products stored for each variable name"""

        for i in self.data.columns:
            yield i

    def has_attr(self, name):
        """Returns boolean indicating presence of given attribute name

        Case-insensitive check

        Notes
        -----
        Does not check higher order meta objects

        Parameters
        ----------
        name : str
            name of variable to get stored case form

        Returns
        -------
        bool
            True if case-insesitive check for attribute name is True

        """

        if name.lower() in [i.lower() for i in self.data.columns]:
            return True
        return False

    def attr_case_name(self, name):
        """Returns preserved case name for case insensitive value of name.

        Checks first within standard attributes. If not found there, checks
        attributes for higher order data structures. If not found, returns
        supplied name as it is available for use. Intended to be used to help
        ensure that the same case is applied to all repetitions of a given
        variable name.

        Parameters
        ----------
        name : str
            name of variable to get stored case form

        Returns
        -------
        str
            name in proper case
        """

        lower_name = name.lower()
        for i in self.attrs():
            if lower_name == i.lower():
                return i
        # check if attribute present in higher order structures
        for key in self.keys_nD():
            for i in self[key].children.attrs():
                if lower_name == i.lower():
                    return i
        # nothing was found if still here
        # pass name back, free to be whatever
        return name

    def concat(self, other, strict=False):
        """Concats two metadata objects together.

        Parameters
        ----------
        other : Meta
            Meta object to be concatenated
        strict : bool
            if True, ensure there are no duplicate variable names

        Notes
        -----
        Uses units and name label of self if other is different

        Returns
        -------
        Meta
            Concatenated object
        """

        mdata = self.copy()
        # checks
        if strict:
            for key in other.keys():
                if key in mdata:
                    raise RuntimeError(''.join(
                        ('Duplicated keys (variable ', 'names) across Meta ',
                         'objects in keys().')))
            for key in other.keys_nD():
                if key in mdata:

                    raise RuntimeError(''.join(
                        ('Duplicated keys (variable ', 'names) across Meta '
                         'objects in keys_nD().')))

        # make sure labels between the two objects are the same
        other_updated = self.apply_default_labels(other)
        # concat 1D metadata in data frames to copy of
        # current metadata
        for key in other_updated.keys():
            mdata.data.loc[key] = other.data.loc[key]
        # add together higher order data
        for key in other_updated.keys_nD():
            mdata.ho_data[key] = other.ho_data[key]

        return mdata

    def copy(self):
        from copy import deepcopy as deepcopy
        """Deep copy of the meta object."""
        return deepcopy(self)

    def pop(self, name):
        """Remove and return metadata about variable

        Parameters
        ----------
        name : str
            variable name

        Returns
        -------
        pandas.Series
            Series of metadata for variable
        """
        # check if present
        if name in self:
            # get case preserved name for variable
            new_name = self.var_case_name(name)
            # check if 1D or nD
            if new_name in self.keys():
                output = self[new_name]
                self.data.drop(new_name, inplace=True, axis=0)
            else:
                output = self.ho_data.pop(new_name)

            return output
        else:
            raise KeyError('Key not present in metadata variables')

    def transfer_attributes_to_instrument(self, inst, strict_names=False):
        """Transfer non-standard attributes in Meta to Instrument object.

        Pysat's load_netCDF and similar routines are only able to attach
        netCDF4 attributes to a Meta object. This routine identifies these
        attributes and removes them from the Meta object. Intent is to
        support simple transfers to the pysat.Instrument object.

        Will not transfer names that conflict with pysat default attributes.

        Parameters
        ----------
        inst : pysat.Instrument
            Instrument object to transfer attributes to
        strict_names : boolean (False)
            If True, produces an error if the Instrument object already
            has an attribute with the same name to be copied.

        Returns
        -------
        None
            pysat.Instrument object modified in place with new attributes
        """

        # base Instrument attributes
        banned = inst._base_attr
        # get base attribute set, and attributes attached to instance
        base_attrb = self._base_attr
        this_attrb = dir(self)
        # collect these attributes into a dict
        adict = {}
        transfer_key = []
        for key in this_attrb:
            if key not in banned:
                if key not in base_attrb:
                    # don't store _ leading attributes
                    if key[0] != '_':
                        adict[key] = self.__getattribute__(key)
                        transfer_key.append(key)

        # store any non-standard attributes in Instrument
        # get list of instrument objects attributes first
        # to check if a duplicate

        # instrument attributes are now inst.meta attributes
        inst_attr = dir(inst)

        for key in transfer_key:
            if key not in banned:
                if key not in inst_attr:
                    inst.__setattr__(key, adict[key])
                else:
                    if not strict_names:
                        # new_name = 'pysat_attr_'+key
                        inst.__setattr__(key, adict[key])
                    else:
                        raise RuntimeError(''.join(
                            ('Attribute ', key, ' attached to Meta object',
                             ' can not be transferred',
                             ' as it already exists in',
                             ' the Instrument object.')))
        # return inst

    def __eq__(self, other):
        """
        Check equality between Meta instances. Good for testing.

        Checks if variable names, attribute names, and metadata values
        are all equal between to Meta objects. Note that this comparison
        treats np.NaN == np.NaN as True.

        Name comparison is case-sensitive.

        """

        if isinstance(other, Meta):
            # check first if variables and attributes are the same
            # quick check on length
            keys1 = [i for i in self.keys()]
            keys2 = [i for i in other.keys()]
            if len(keys1) != len(keys2):
                return False
            # now iterate over each of the keys in the first one
            # don't need to iterate over second one, if all of the first
            # in the second we are good. No more or less items in second from
            # check earlier.
            for key in keys1:
                if key not in keys2:
                    return False
            # do same checks on attributes
            attrs1 = [i for i in self.attrs()]
            attrs2 = [i for i in other.attrs()]
            if len(attrs1) != len(attrs2):
                return False
            for attr in attrs1:
                if attr not in attrs2:
                    return False
            # now check the values of all elements now that we know all
            # variable and attribute names are the same
            for key in self.keys():
                for attr in self.attrs():
                    if not (self[key, attr] == other[key, attr]):
                        # np.nan is not equal to anything
                        # if both values are NaN, ok in my book
                        try:
                            if not (np.isnan(self[key, attr])
                                    and np.isnan(other[key, attr])):
                                # one or both are not NaN and they aren't equal
                                # test failed
                                return False
                        except TypeError:
                            # comparison above gets unhappy with string inputs
                            return False

            # check through higher order products
            # in the same manner as code above
            keys1 = [i for i in self.keys_nD()]
            keys2 = [i for i in other.keys_nD()]
            if len(keys1) != len(keys2):
                return False
            for key in keys1:
                if key not in keys2:
                    return False
            # do same check on all sub variables within each nD key
            for key in self.keys_nD():
                keys1 = [i for i in self[key].children.keys()]
                keys2 = [i for i in other[key].children.keys()]
                if len(keys1) != len(keys2):
                    return False
                for key_check in keys1:
                    if key_check not in keys2:
                        return False
                # check if attributes are the same
                attrs1 = [i for i in self[key].children.attrs()]
                attrs2 = [i for i in other[key].children.attrs()]
                if len(attrs1) != len(attrs2):
                    return False
                for attr in attrs1:
                    if attr not in attrs2:
                        return False
                # now time to check if all elements are individually equal
                for key2 in self[key].children.keys():
                    for attr in self[key].children.attrs():
                        if not (self[key].children[key2, attr]
                                == other[key].children[key2, attr]):
                            try:
                                if not (np.isnan(self[key].children[key2,
                                                                    attr])
                                        and np.isnan(
                                            other[key].children[key2, attr])):
                                    return False
                            except TypeError:
                                # comparison above gets unhappy with string
                                # inputs
                                return False
            # if we made it this far, things are good
            return True
        else:
            # wasn't even the correct class
            return False

    @classmethod
    def from_csv(cls, name=None, col_names=None, sep=None, **kwargs):
        """Create instrument metadata object from csv.

        Parameters
        ----------
        name : string
            absolute filename for csv file or name of file
            stored in pandas instruments location
        col_names : list-like collection of strings
            column names in csv and resultant meta object
        sep : string
            column seperator for supplied csv filename

        Note
        ----
        column names must include at least ['name', 'long_name', 'units'],
        assumed if col_names is None.
        """
        import pysat
        req_names = ['name', 'long_name', 'units']
        if col_names is None:
            col_names = req_names
        elif not all([i in col_names for i in req_names]):
            raise ValueError('col_names must include name, long_name, units.')

        if sep is None:
            sep = ','

        if name is None:
            raise ValueError('Must supply an instrument name or file path.')
        elif not isinstance(name, str):
            raise ValueError('keyword name must be related to a string')
        elif not os.path.isfile(name):
            # Not a real file, assume input is a pysat instrument name
            # and look in the standard pysat location.
            test = os.path.join(pysat.__path__[0], 'instruments', name)
            if os.path.isfile(test):
                name = test
            else:
                # trying to form an absolute path for success
                test = os.path.abspath(name)
                if not os.path.isfile(test):
                    raise ValueError("Unable to create valid file path.")
                else:
                    # success
                    name = test

        mdata = pds.read_csv(name, names=col_names, sep=sep, **kwargs)

        if not mdata.empty:
            # make sure the data name is the index
            mdata.index = mdata['name']
            del mdata['name']
            return cls(metadata=mdata)
        else:
            raise ValueError('Unable to retrieve information from ' + name)

Exemple #17

0

Afficher le fichier

Fichier : _meta.py Projet : rstoneback/pysat

class Meta(object):
    """ Stores metadata for Instrument instance, similar to CF-1.6 netCDFdata
    standard.
    
    Parameters
    ----------
    metadata : pandas.DataFrame 
        DataFrame should be indexed by variable name that contains at minimum
        the standard_name (name), units, and long_name for the data stored in
        the associated pysat Instrument object.

    units_label : str
        String used to label units in storage. Defaults to 'units'.
    name_label : str
        String used to label long_name in storage. Defaults to 'long_name'.
    notes_label : str
        String used to label 'notes' in storage. Defaults to 'notes'
    desc_label : str
        String used to label variable descriptions in storage. Defaults to 'desc'
    plot_label : str
        String used to label variables in plots. Defaults to 'label'
    axis_label : str
        Label used for axis on a plot. Defaults to 'axis'
    scale_label : str
        string used to label plot scaling type in storage. Defaults to 'scale'
    min_label : str
        String used to label typical variable value min limit in storage.
        Defaults to 'value_min'
    max_label : str
        String used to label typical variable value max limit in storage.
        Defaults to 'value_max'
    fill_label : str
        String used to label fill value in storage. Defaults to 'fill' per
        netCDF4 standard


    Attributes
    ----------
    data : pandas.DataFrame
        index is variable standard name, 'units', 'long_name', and other
        defaults are also stored along with additional user provided labels.

    units_label : str
        String used to label units in storage. Defaults to 'units'.
    name_label : str
        String used to label long_name in storage. Defaults to 'long_name'.
    notes_label : str
       String used to label 'notes' in storage. Defaults to 'notes'
    desc_label : str
       String used to label variable descriptions in storage. Defaults to 'desc'
    plot_label : str
       String used to label variables in plots. Defaults to 'label'
    axis_label : str
        Label used for axis on a plot. Defaults to 'axis'
    scale_label : str
       string used to label plot scaling type in storage. Defaults to 'scale'
    min_label : str
       String used to label typical variable value min limit in storage.
       Defaults to 'value_min'
    max_label : str
       String used to label typical variable value max limit in storage.
       Defaults to 'value_max'
    fill_label : str
        String used to label fill value in storage. Defaults to 'fill' per
        netCDF4 standard


    Notes
    -----
    Meta object preserves the case of variables and attributes as it first 
    receives the data. Subsequent calls to set new metadata with the same
    variable or attribute will use case of first call. Accessing or setting
    data thereafter is case insensitive. In practice, use is case insensitive
    but the original case is preserved. Case preseveration is built in to
    support writing files with a desired case to meet standards.

    Metadata for higher order data objects, those that have
    multiple products under a single variable name in a pysat.Instrument
    object, are stored by providing a Meta object under the single name.

    Supports any custom metadata values in addition to the expected metadata
    attributes (units, name, notes, desc, plot_label, axis, scale, 
    value_min, value_max, and fill). These base attributes may be used to 
    programatically access and set types of metadata regardless of the string 
    values used for the attribute. String values for attributes may need to be 
    changed depending upon the standards of code or files interacting with pysat.
    
    Meta objects returned as part of pysat loading routines are automatically
    updated to use the same values of plot_label, units_label, etc. as found
    on the pysat.Instrument object.
    
    Examples
    --------
    ::
        # instantiate Meta object, default values for attribute labels are used
        meta = pysat.Meta()
        # set a couple base units
        # note that other base parameters not set below will
        # be assigned a default value
        meta['name'] = {'long_name':string, 'units':string}
        # update 'units' to new value
        meta['name'] = {'units':string}
        # update 'long_name' to new value
        meta['name'] = {'long_name':string}
        # attach new info with partial information, 'long_name' set to 'name2'
        meta['name2'] = {'units':string}
        # units are set to '' by default
        meta['name3'] = {'long_name':string}

        # assigning custom meta parameters
        meta['name4'] = {'units':string, 'long_name':string
                         'custom1':string, 'custom2':value}
        meta['name5'] = {'custom1':string, 'custom3':value}

        # assign multiple variables at once
        meta[['name1', 'name2']] = {'long_name':[string1, string2],
                                    'units':[string1, string2],
                                    'custom10':[string1, string2]}

        # assiging metadata for n-Dimensional variables
        meta2 = pysat.Meta()
        meta2['name41'] = {'long_name':string, 'units':string}
        meta2['name42'] = {'long_name':string, 'units':string}
        meta['name4'] = {'meta':meta2}
        # or
        meta['name4'] = meta2
        meta['name4'].children['name41']

        # mixture of 1D and higher dimensional data
        meta = pysat.Meta()
        meta['dm'] = {'units':'hey', 'long_name':'boo'}
        meta['rpa'] = {'units':'crazy', 'long_name':'boo_whoo'}
        meta2 = pysat.Meta()
        meta2[['higher', 'lower']] = {'meta':[meta, None],
                                      'units':[None, 'boo'],
                                      'long_name':[None, 'boohoo']}

        # assign from another Meta object
        meta[key1] = meta2[key2]

        # access fill info for a variable, presuming default label
        meta[key1, 'fill']
        # access same info, even if 'fill' not used to label fill values
        meta[key1, meta.fill_label]


        # change a label used by Meta object
        # note that all instances of fill_label
        # within the meta object are updated
        meta.fill_label = '_FillValue'
        meta.plot_label = 'Special Plot Variable'
        # this feature is useful when converting metadata within pysat
        # so that it is consistent with externally imposed file standards

    """

    def __init__(self, metadata=None, units_label='units', name_label='long_name',
                 notes_label='notes', desc_label='desc', plot_label='label',
                 axis_label='axis', scale_label='scale', min_label='value_min',
                 max_label='value_max', fill_label='fill'):
        # set units and name labels directly
        self._units_label = units_label
        self._name_label = name_label
        self._notes_label = notes_label
        self._desc_label = desc_label
        self._plot_label = plot_label
        self._axis_label = axis_label
        self._scale_label = scale_label
        self._min_label = min_label
        self._max_label = max_label
        self._fill_label = fill_label
        # init higher order (nD) data structure container, a dict
        self._ho_data = {}
        # use any user provided data to instantiate object with data
        # attirube unit and name labels are called within
        if metadata is not None:
            if isinstance(metadata, DataFrame):
                self._data = metadata
                # make sure defaults are taken care of for required metadata
                self.accept_default_labels(self)
            else:
                raise ValueError("Input must be a pandas DataFrame type. "+
                            "See other constructors for alternate inputs.")
        else:
            self._data = DataFrame(None, columns=[self._units_label, self._name_label,
                                                 self._desc_label,
                                                 self._plot_label, self._axis_label,
                                                 self._scale_label, self.notes_label,
                                                 self._min_label, self._max_label,
                                                 self._fill_label])

        # establish attributes intrinsic to object, before user can
        # add any
        self._base_attr = dir(self)

    @property
    def ho_data(self):
        return self._ho_data

    @property
    def data(self):
        return self._data

    @data.setter   
    def data(self, new_frame):
        self._data = new_frame
        # self.keys = self._data.columns.lower()

    @ho_data.setter   
    def ho_data(self, new_dict):
        self._ho_data = new_dict
    
    @property    
    def empty(self):
        """Return boolean True if there is no metadata"""
        
        # only need to check on lower data since lower data
        # is set when higher metadata assigned
        if self.data.empty:
            return True
        else:
            return False

    def merge(self, other):
        """Adds metadata variables to self that are in other but not in self.
        
        Parameters
        ----------
        other : pysat.Meta
        
        """
        
        for key in other.keys():
            if key not in self:
                # copies over both lower and higher dimensional data
                self[key] = other[key]

    def drop(self, names):
        """Drops variables (names) from metadata."""
        
        # drop lower dimension data
        self._data = self._data.drop(names, axis=0)
        # drop higher dimension data
        for name in names:
            if name in self._ho_data:
                _ = self._ho_data.pop(name)

    def keep(self, keep_names):
        """Keeps variables (keep_names) while dropping other parameters"""
        
        current_names = self._data.columns
        drop_names = []
        for name in current_names:
            if name not in keep_names:
                drop_names.append(name)
        self.drop(drop_names)
        
#     def default_labels_and_values(self, name):
#         """Returns dictionary of default meta labels and values for name variable.
# 
#         Metadata is automatically tracked for various properties, name,
#         long_name, units, description, etc. Each of these values (labels)
#         corresponds to a given string (values).
# 
#         Parameters
#         ----------
#         name : list_like of str
#             variable names to get default metadata parameters for
# 
#         Returns
#         -------
#         dict
#             keys are metadata labels used within Meta object, values are the default
#             values assigned if data is never specified by user
# 
#         """
#         num = len(name)
#         default_str = [''] * num
#         default_nan = [np.NaN] * num
#         return {self.units_label: default_str,
#                 self.name_label: name,
#                 self.notes_label: default_str,
#                 self.desc_label: default_str,
#                 self.plot_label: name,
#                 self.axis_label: name,
#                 self.scale_label: ['linear'] * num,
#                 self.min_label: default_nan,
#                 self.max_label: default_nan,
#                 self.fill_label: default_nan}

    def apply_default_labels(self, other):
        """Applies labels for default meta labels from self onto other.
        
        Parameters
        ----------
        other : Meta
            Meta object to have default labels applied
        
        Returns
        -------
        Meta
        
        """
        other_updated = other.copy()
        other_updated.units_label = self.units_label
        other_updated.name_label = self.name_label
        other_updated.notes_label = self.notes_label
        other_updated.desc_label = self.desc_label
        other_updated.plot_label = self.plot_label
        other_updated.axis_label = self.axis_label
        other_updated.scale_label = self.scale_label
        other_updated.min_label = self.min_label
        other_updated.max_label = self.max_label
        other_updated.fill_label = self.fill_label
        return other

    def accept_default_labels(self, other):
        """Applies labels for default meta labels from other onto self.
        
        Parameters
        ----------
        other : Meta
            Meta object to take default labels from
        
        Returns
        -------
        Meta
        
        """

        self.units_label = other.units_label
        self.name_label = other.name_label
        self.notes_label = other.notes_label
        self.desc_label = other.desc_label
        self.plot_label = other.plot_label
        self.axis_label = other.axis_label
        self.scale_label = other.scale_label
        self.min_label = other.min_label
        self.max_label = other.max_label
        self.fill_label = other.fill_label
        return 

    
    def __contains__(self, other):
        """case insensitive check for variable name"""
        
        if other.lower() in [i.lower() for i in self.keys()]:
            return True
        if other.lower() in [i.lower() for i in self.keys_nD()]:
            return True
        return False

    def __repr__(self):
        return 'pysat.MetaData'

    def __str__(self, recurse=True):
        """String describing Meta instance, variables, and attributes"""

        # cover 1D parameters
        if recurse:
            output_str = 'Metadata for 1D variables\n'
        else:
            output_str = ''

        for ind in self.keys():
            output_str += ind.ljust(30)
        output_str += '\n\n'
        output_str += 'Tracking the following:\n'
        for col in self.attrs():
            output_str += col.ljust(30)

        output_str += '\n'
        if recurse:
            for item_name in self.keys_nD():
                output_str += '\n\n'
                output_str += 'Metadata for '+item_name+'\n'
                output_str += self.ho_data[item_name].__str__(False)

        return output_str

    def _insert_default_values(self, input_name):
                        
        default_str = ''
        default_nan = np.NaN
        labels = [self.units_label, self.name_label, self.notes_label,
                  self.desc_label, self.plot_label, self.axis_label, 
                  self.scale_label, self.min_label, self.max_label, 
                  self.fill_label]
        defaults = [default_str, input_name, default_str, default_str,
                    input_name, input_name, 'linear', default_nan, 
                    default_nan, default_nan]
        self._data.loc[input_name, labels] = defaults
        
    def __setitem__(self, names, input_data):
        """Convenience method for adding metadata."""

        if isinstance(input_data, dict):           
            # if not passed an iterable, make it one
            if isinstance(names, basestring):
                names = [names]
                for key in input_data:
                    input_data[key] = [input_data[key]]
            # make sure the variable names are in good shape
            # Meta object is case insensitive but case preserving
            # convert given names into ones Meta has already seen
            # if new, then input names become the standard
            names = [self.var_case_name(name) for name in names]
            for name in names:
                if name not in self:
                    self._insert_default_values(name)
            # check if input dict empty
            if input_data.keys() == []:
                # meta wasn't actually assigned by user, empty call
                # we can head out - we've assigned defaults if first data
                return
            # perform some checks on the data
            # make sure number of inputs matches number of metadata inputs
            for key in input_data:
                if len(names) != len(input_data[key]):
                    raise ValueError('Length of names and inputs must be equal.')
            # make sure the attribute names are in good shape
            # check name of attributes against existing attribute names
            # if attribute name exists somewhere, then case of existing attribute
            # will be enforced upon new data by default for consistency
            keys = [i for i in input_data]
            for name in keys:
                new_name = self.attr_case_name(name)
                if new_name != name:
                    input_data[new_name] = input_data.pop(name)

            # time to actually add the metadata           
            for key in input_data:
                if key not in ['children', 'meta']:
                    for i, name in enumerate(names):
                        to_be_set = input_data[key][i]
                        if hasattr(to_be_set, '__iter__') and not isinstance(to_be_set, basestring):
                            if isinstance(to_be_set[0], basestring):
                                self._data.loc[name, key] = '\n\n'.join(to_be_set)
                            else:
                                warnings.warn(' '.join(('Array elements are disallowed in meta.',
                                                'Dropping input :', key)))
                        else:
                            self._data.loc[name, key] = to_be_set
                else:
                    # key is 'meta' or 'children'
                    # process higher order stuff. Meta inputs could be part of 
                    # larger multiple parameter assignment
                    # so not all names may actually have 'meta' to add
                    for j, (item, val) in enumerate(zip(names, input_data['meta'])):
                        if val is not None:
                            # assign meta data, recursive call....
                            # heads to if Meta instance call
                            self[item] = val

        elif isinstance(input_data, Series):
            # outputs from Meta object are a Series.
            # thus this takes in input from a Meta object
            # set data usind standard assignment via a dict
            in_dict = input_data.to_dict()
            if 'children' in in_dict:
                child = in_dict.pop('children')
                if child is not None: 
                    # if not child.data.empty:
                    self.ho_data[names] = child
            # remaining items are simply assigned                            
            self[names] = in_dict

        elif isinstance(input_data, Meta):
            # dealing with higher order data set
            # names is only a single name here (by choice for support)
            if (names in self._ho_data) and (input_data.empty):
                # no actual metadata provided and there is already some
                # higher order metadata in self
                return
                
            # get Meta approved variable names
            new_item_name = self.var_case_name(names)
            # ensure that Meta labels of object to be assigned 
            # are consistent with self
            # input_data accepts self's labels
            input_data.accept_default_labels(self)

            # go through and ensure Meta object to be added has variable and
            # attribute names consistent with other variables and attributes
            # this covers custom attributes not handled by default routine above
            attr_names = input_data.attrs()
            new_names = []
            for name in attr_names:
                new_names.append(self.attr_case_name(name))
            input_data.data.columns = new_names
            # same thing for variables
            var_names = input_data.data.index
            new_names = []
            for name in var_names:
                new_names.append(self.var_case_name(name))
            input_data.data.index = new_names
            # assign Meta object now that things are consistent with Meta
            # object settings
            # but first, make sure there are lower dimension metadata
            # parameters, passing in an empty dict fills in defaults
            # if there is no existing metadata info
            self[new_item_name] = {}
            # now add to higher order data
            self._ho_data[new_item_name] = input_data

    def __getitem__(self, key):
        """Convenience method for obtaining metadata.
        
        Maps to pandas DataFrame.loc method.
        
        Examples
        --------
        ::
        
            meta['name']
            
            meta[ 'name1', 'units' ]

            for higher order data
            
            meta[ 'name1', 'subvar', 'units' ]
        
        """
        # if key is a tuple, looking at index, column access pattern
        if isinstance(key, tuple):
            # if tuple length is 2, index, column
            if len(key) == 2:
                new_index = self.var_case_name(key[0])
                new_name = self.attr_case_name(key[1])
                return self.data.loc[new_index, new_name]
            # if tuple length is 3, index, child_index, column
            elif len(key) == 3:
                new_index = self.var_case_name(key[0])
                new_child_index = self.var_case_name(key[1])
                new_name = self.attr_case_name(key[2])
                return self.ho_data[new_index].data.loc[new_child_index, new_name]
        else:
            # ensure variable is present somewhere
            if key in self:
                # get case preserved string for variable name
                new_key = self.var_case_name(key)
                # if new_key in self.keys():
                # don't need to check if in lower, all variables
                # are always in the lower metadata
                meta_row = self.data.loc[new_key]
                if new_key in self.keys_nD():
                    meta_row.at['children'] = self.ho_data[new_key].copy()
                else:
                    # empty_meta = Meta()
                    # self.apply_default_labels(empty_meta)
                    meta_row.at['children'] = None #empty_meta
                return meta_row
                # else:
                #     return pds.Series([self.ho_data[new_key].copy()], index=['children'])
            else:
                raise KeyError('Key not found in MetaData')

    def _label_setter(self, new_label, current_label, attr_label, default=np.NaN, use_names_default=False):
        """Generalized setter of default meta attributes
        
        Parameters
        ----------
        new_label : str
            New label to use in the Meta object
        current_label : str
            The hidden attribute to be updated that actually stores metadata
        default : 
            Deafult setting to use for label if there is no attribute
            value
        use_names_default : bool
            if True, MetaData variable names are used as the default
            value for the specified Meta attributes settings
            
        Examples
        --------
        :
                @name_label.setter   
                def name_label(self, new_label):
                    self._label_setter(new_label, self._name_label, 
                                        use_names_default=True)  
        
        Notes
        -----
        Not intended for end user
                                  
        """
        
        if new_label not in self.attrs():
            # new label not in metadata, including case
            # update existing label, if present
            if current_label in self.attrs():
                # old label exists and has expected case
                self.data.loc[:, new_label] = self.data.loc[:, current_label]
                self.data.drop(current_label, axis=1, inplace=True)
            else:
                if self.has_attr(current_label):
                    # there is something like label, wrong case though
                    current_label = self.attr_case_name(current_label)
                    self.data.loc[:, new_label] = self.data.loc[:, current_label]
                    self.data.drop(current_label, axis=1, inplace=True)
                else:
                    # there is no existing label
                    # setting for the first time
                    if use_names_default:
                        self.data[new_label] = self.data.index
                    else:
                        self.data[new_label] = default
            # check higher order structures as well
            # recursively change labels here
            for key in self.keys_nD():
                setattr(self.ho_data[key], attr_label, new_label)

        # now update 'hidden' attribute value
        # current_label = new_label
        setattr(self, ''.join(('_',attr_label)), new_label)

    @property
    def units_label(self):
        return self._units_label

    @property
    def name_label(self):
        return self._name_label

    @property
    def notes_label(self):
        return self._notes_label

    @property
    def desc_label(self):
        return self._desc_label

    @property
    def plot_label(self):
        return self._plot_label

    @property
    def axis_label(self):
        return self._axis_label

    @property
    def scale_label(self):
        return self._scale_label

    @property
    def min_label(self):
        return self._min_label

    @property
    def max_label(self):
        return self._max_label

    @property
    def fill_label(self):
        return self._fill_label   
             
    @units_label.setter   
    def units_label(self, new_label):
        self._label_setter(new_label, self._units_label, 'units_label', '') 
    @name_label.setter   
    def name_label(self, new_label):
        self._label_setter(new_label, self._name_label, 'name_label', use_names_default=True)     
    @notes_label.setter   
    def notes_label(self, new_label):
        self._label_setter(new_label, self._notes_label, 'notes_label', '')
    @desc_label.setter   
    def desc_label(self, new_label):
        self._label_setter(new_label, self._desc_label, 'desc_label', '')
    @plot_label.setter   
    def plot_label(self, new_label):
        self._label_setter(new_label, self._plot_label, 'plot_label', use_names_default=True)
    @axis_label.setter   
    def axis_label(self, new_label):
        self._label_setter(new_label, self._axis_label, 'axis_label', use_names_default=True)
    @scale_label.setter   
    def scale_label(self, new_label):
        self._label_setter(new_label, self._scale_label, 'scale_label', 'linear')
    @min_label.setter   
    def min_label(self, new_label):
        self._label_setter(new_label, self._min_label, 'min_label', np.NaN)
    @max_label.setter   
    def max_label(self, new_label):
        self._label_setter(new_label, self._max_label, 'max_label', np.NaN)
    @fill_label.setter   
    def fill_label(self, new_label):
        self._label_setter(new_label, self._fill_label, 'fill_label', np.NaN)

    def var_case_name(self, name):
        """Provides stored name (case preserved) for case insensitive input
        
        If name is not found (case-insensitive check) then name is returned,
        as input. This function is intended to be used to help ensure the
        case of a given variable name is the same across the Meta object.
        
        Parameters
        ----------
        name : str
            variable name in any case
            
        Returns
        -------
        str
            string with case preserved as in metaobject
            
        """

        lower_name = name.lower()
        if name in self:
            for i in self.keys():
                if lower_name == i.lower():
                    return i
            for i in self.keys_nD():
                if lower_name == i.lower():
                    return i
        return name

    def keys(self):
        """Yields variable names stored for 1D variables"""

        for i in self.data.index:
            yield i

    def keys_nD(self):
        """Yields keys for higher order metadata"""

        for i in self.ho_data:
            yield i

    def attrs(self):
        """Yields metadata products stored for each variable name"""

        for i in self.data.columns:
            yield i

    def has_attr(self, name):
        """Returns boolean indicating presence of given attribute name
        
        Case-insensitive check
        
        Notes
        -----
        Does not check higher order meta objects
        
        Parameters
        ----------
        name : str
            name of variable to get stored case form
            
        Returns
        -------
        bool
            True if case-insesitive check for attribute name is True

        """

        if name.lower() in [i.lower() for i in self.data.columns]:
            return True
        return False

    def attr_case_name(self, name):
        """Returns preserved case name for case insensitive value of name.
        
        Checks first within standard attributes. If not found there, checks
        attributes for higher order data structures. If not found, returns
        supplied name as it is available for use. Intended to be used to help
        ensure that the same case is applied to all repetitions of a given
        variable name.
        
        Parameters
        ----------
        name : str
            name of variable to get stored case form

        Returns
        -------
        str
            name in proper case
        """

        lower_name = name.lower()
        for i in self.attrs():
            if lower_name == i.lower():
                return i
        # check if attribute present in higher order structures
        for key in self.keys_nD():
            for i in self[key].children.attrs():
                if lower_name == i.lower():
                    return i
        # nothing was found if still here
        # pass name back, free to be whatever
        return name

    def concat(self, other, strict=False):
        """Concats two metadata objects together.

        Parameters
        ----------
        other : Meta
            Meta object to be concatenated
        strict : bool
            if True, ensure there are no duplicate variable names

        Notes
        -----
        Uses units and name label of self if other is different
        
        Returns
        -------
        Meta
            Concatenated object
        """

        mdata = self.copy()
        # checks
        if strict:
            for key in other.keys():
                if key in mdata:
                    raise RuntimeError('Duplicated keys (variable names) ' +
                                       'across Meta objects in keys().')
            for key in other.keys_nD():
                if key in mdata:

                    raise RuntimeError('Duplicated keys (variable names) across '
                                        'Meta objects in keys_nD().')
                                        
        # make sure labels between the two objects are the same
        other_updated = self.apply_default_labels(other)
        # concat 1D metadata in data frames to copy of
        # current metadata
# <<<<<<< ho_meta_fix
        for key in other_updated.keys():
            mdata.data.loc[key] = other.data.loc[key]
        # add together higher order data
        for key in other_updated.keys_nD():
            mdata.ho_data[key] = other.ho_data[key]
# =======
#         for key in other_updated.keys():
#             mdata[key] = other_updated[key]
#         # add together higher order data
#         for key in other_updated.keys_nD():
#             mdata[key] = other_updated[key]

        return mdata

    def copy(self):
        from copy import deepcopy as deepcopy
        """Deep copy of the meta object."""
        return deepcopy(self) 

    def pop(self, name):
        """Remove and return metadata about variable

        Parameters
        ----------
        name : str
            variable name

        Returns
        -------
        pandas.Series
            Series of metadata for variable
        """
        # check if present
        if name in self:
            # get case preserved name for variable
            new_name = self.var_case_name(name)
            # check if 1D or nD
            if new_name in self.keys():
                output = self[new_name]
                self.data.drop(new_name, inplace=True, axis=0)
            else:
                output = self.ho_data.pop(new_name)
                
            return output
        else:
            raise KeyError('Key not present in metadata variables')


    def transfer_attributes_to_instrument(self, inst, strict_names=False):
        """Transfer non-standard attributes in Meta to Instrument object.

        Pysat's load_netCDF and similar routines are only able to attach
        netCDF4 attributes to a Meta object. This routine identifies these
        attributes and removes them from the Meta object. Intent is to 
        support simple transfers to the pysat.Instrument object.

        Will not transfer names that conflict with pysat default attributes.
        
        Parameters
        ----------
        inst : pysat.Instrument
            Instrument object to transfer attributes to
        strict_names : boolean (False)
            If True, produces an error if the Instrument object already
            has an attribute with the same name to be copied.

        Returns
        -------
        None
            pysat.Instrument object modified in place with new attributes
        """

        # base Instrument attributes
        banned = inst._base_attr
        # get base attribute set, and attributes attached to instance
        base_attrb = self._base_attr
        this_attrb = dir(self)
        # collect these attributes into a dict
        adict = {}
        transfer_key = []
        for key in this_attrb:
            if key not in banned:
                if key not in base_attrb:
                    # don't store _ leading attributes
                    if key[0] != '_':
                        adict[key] = self.__getattribute__(key)
                        transfer_key.append(key)

        # store any non-standard attributes in Instrument
        # get list of instrument objects attributes first
        # to check if a duplicate
        inst_attr = dir(inst)
        for key in transfer_key:
            if key not in banned:
                if key not in inst_attr:
                    inst.__setattr__(key, adict[key])
                else:
                    if not strict_names:
                        # new_name = 'pysat_attr_'+key
                        inst.__setattr__(key, adict[key])
                    else:
                        raise RuntimeError('Attribute ' + key +
                                           'attached to Meta object can not be '
                                           + 'transferred as it already exists'
                                           + ' in the Instrument object.')
        # return inst

    def __eq__(self, other):
        """
        Check equality between Meta instances. Good for testing.
        
        Checks if variable names, attribute names, and metadata values
        are all equal between to Meta objects. Note that this comparison
        treats np.NaN == np.NaN as True.
        
        Name comparison is case-sensitive.
        
        """
        
        if isinstance(other, Meta):
            # check first if variables and attributes are the same
            # quick check on length
            keys1 = [i for i in self.keys()]
            keys2 = [i for i in other.keys()]
            if len(keys1) != len(keys2):
                return False
            # now iterate over each of the keys in the first one
            # don't need to iterate over second one, if all of the first
            # in the second we are good. No more or less items in second from 
            # check earlier.
            for key in keys1:
                if key not in keys2:
                    return False
            # do same checks on attributes 
            attrs1 = [i for i in self.attrs()]
            attrs2 = [i for i in other.attrs()]
            if len(attrs1) != len(attrs2):
                return False
            for attr in attrs1:
                if attr not in attrs2:
                    return False
            # now check the values of all elements now that we know all variable
            # and attribute names are the same
            for key in self.keys():
                for attr in self.attrs():
                    if not (self[key, attr] == other[key, attr]):
                        # np.nan is not equal to anything
                        # if both values are NaN, ok in my book
                        try:
                            if not (np.isnan(self[key, attr]) and np.isnan(other[key, attr])):
                                # one or both are not NaN and they aren't equal
                                # test failed
                                return False
                        except TypeError:
                            # comparison above gets unhappy with string inputs
                            return False

            # check through higher order products
            # in the same manner as code above
            keys1 = [i for i in self.keys_nD()]
            keys2 = [i for i in other.keys_nD()]
            if len(keys1) != len(keys2):
                return False
            for key in keys1:
                if key not in keys2:
                    return False
            # do same check on all sub variables within each nD key
            for key in self.keys_nD():
                keys1 = [i for i in self[key].children.keys()]
                keys2 = [i for i in other[key].children.keys()]
                if len(keys1) != len(keys2):
                    return False
                for key_check in keys1:
                    if key_check not in keys2:
                        return False
                # check if attributes are the same
                attrs1 = [i for i in self[key].children.attrs()]
                attrs2 = [i for i in other[key].children.attrs()]
                if len(attrs1) != len(attrs2):
                    return False
                for attr in attrs1:
                    if attr not in attrs2:
                        return False
                # now time to check if all elements are individually equal
                for key2 in self[key].children.keys():
                    for attr in self[key].children.attrs():
                        if not (self[key].children[key2, attr] == other[key].children[key2, attr]):
                            try:
                                if not (np.isnan(self[key].children[key2, attr]) and np.isnan(other[key].children[key2, attr])):
                                    return False
                            except TypeError:
                                # comparison above gets unhappy with string inputs
                                return False
            # if we made it this far, things are good                
            return True
        else:
            # wasn't even the correct class
            return False        
                        
    @classmethod
    def from_csv(cls, name=None, col_names=None, sep=None, **kwargs):
        """Create instrument metadata object from csv.

        Parameters
        ----------
        name : string
            absolute filename for csv file or name of file
            stored in pandas instruments location
        col_names : list-like collection of strings
            column names in csv and resultant meta object
        sep : string
            column seperator for supplied csv filename

        Note
        ----
        column names must include at least ['name', 'long_name', 'units'], 
        assumed if col_names is None.
        """
        import pysat
        req_names = ['name','long_name','units']
        if col_names is None:
            col_names = req_names
        elif not all([i in col_names for i in req_names]):
            raise ValueError('col_names must include name, long_name, units.')

        if sep is None:
            sep = ','

        if name is None:
            raise ValueError('Must supply an instrument name or file path.')
        elif not isinstance(name, str):
            raise ValueError('keyword name must be related to a string')
        elif not os.path.isfile(name):
                # Not a real file, assume input is a pysat instrument name
                # and look in the standard pysat location.
                test =  os.path.join(pysat.__path__[0],'instruments',name)
                if os.path.isfile(test):
                    name = test
                else:
                    #trying to form an absolute path for success
                    test = os.path.abspath(name)
                    if not os.path.isfile(test):
                        raise ValueError("Unable to create valid file path.")
                    else:
                        #success
                        name = test

        mdata = pds.read_csv(name, names=col_names, sep=sep, **kwargs) 

        if not mdata.empty:
            # make sure the data name is the index
            mdata.index = mdata['name']
            del mdata['name']
            return cls(metadata=mdata)
        else:
            raise ValueError('Unable to retrieve information from ' + name)