Exemple #1
0
    def _attach_files(self, files_info):
        """Attach results of instrument list_files routine to Instrument object

        Parameters
        -----------
        file_info :
            Stored file information

        Returns
        ---------
        updates the file list (files), start_date, and stop_date attributes
        of the Files class object.
        """

        if not files_info.empty:
            unique_files = len(files_info.index.unique()) != len(files_info)
            if (not self._sat.multi_file_day and unique_files):
                estr = 'Duplicate datetimes in provided file '
                estr = '{:s}information.\nKeeping one of each '.format(estr)
                estr = '{:s}of the duplicates, dropping the rest.'.format(estr)
                logger.warning(estr)
                ind = files_info.index.duplicated()
                logger.warning(files_info.index[ind].unique())

                idx = np.unique(files_info.index, return_index=True)
                files_info = files_info.iloc[idx[1]]
                # raise ValueError('List of files must have unique datetimes.')

            self.files = files_info.sort_index()
            # filter for empty files here (in addition to refresh)
            if self.ignore_empty_files:
                self._filter_empty_files()
            # extract date information
            if not self.files.empty:
                self.start_date = \
                    self._sat._filter_datetime_input(self.files.index[0])
                self.stop_date = \
                    self._sat._filter_datetime_input(self.files.index[-1])
            else:
                self.start_date = None
                self.stop_date = None
        else:
            self.start_date = None
            self.stop_date = None
            # convert to object type
            # necessary if Series is empty, enables == checks with strings
            self.files = files_info.astype(np.dtype('O'))
Exemple #2
0
def init(self):
    """Initializes the Instrument object with instrument specific values.

    Runs once upon instantiation.

    Parameters
    -----------
    self : pysat.Instrument
        Instrument class object

    """

    logger.info(mm_gold.ack_str)
    logger.warning(' '.join(
        ('Time stamps may be non-unique because Channel A',
         'and B are different instruments.  An upgrade to',
         'the pysat.Constellation object is required to',
         'solve this issue. See pysat issue #614 for more', 'info.')))
    self.acknowledgements = mm_gold.ack_str
    self.references = mm_gold.ref_str

    return
Exemple #3
0
    def refresh(self):
        """Update list of files, if there are changes.

        Calls underlying list_rtn for the particular science instrument.
        Typically, these routines search in the pysat provided path,
        pysat_data_dir/platform/name/tag/,
        where pysat_data_dir is set by pysat.utils.set_data_dir(path=path).


        """

        output_str = '{platform} {name} {tag} {sat_id}'
        output_str = output_str.format(platform=self._sat.platform,
                                       name=self._sat.name,
                                       tag=self._sat.tag,
                                       sat_id=self._sat.sat_id)
        output_str = " ".join(("pysat is searching for", output_str, "files."))
        output_str = " ".join(output_str.split())
        logger.info(output_str)

        info = self._sat._list_rtn(tag=self._sat.tag,
                                   sat_id=self._sat.sat_id,
                                   data_path=self.data_path,
                                   format_str=self.file_format)
        info = self._remove_data_dir_path(info)
        if not info.empty:
            if self.ignore_empty_files:
                self._filter_empty_files()
            logger.info('Found {ll:d} of them.'.format(ll=len(info)))
        else:
            estr = "Unable to find any files that match the supplied template."
            estr += " If you have the necessary files please check pysat "
            estr += "settings and file locations (e.g. pysat.pysat_dir)."
            logger.warning(estr)
        # attach to object
        self._attach_files(info)
        # store - to disk, if enabled
        self._store()
Exemple #4
0
def compare_model_and_inst(pairs=None, inst_name=[], mod_name=[],
                           methods=['all']):
    """Compare modelled and measured data

    .. deprecated:: 2.2.0
      `satellite_view_through_model` will be removed in pysat 3.0.0, it will
      be added to pysatModels

    Parameters
    ------------
    pairs : xarray.Dataset instance
        Dataset containing only the desired observation-model data pairs
    inst_name : list of strings
        ordered list of instrument measurements to compare to modelled data
    mod_name : list of strings
        ordered list of modelled data to compare to instrument measurements
    methods : list of strings
        statistics to calculate.  See Notes for accecpted inputs

    Returns
    ----------
    stat_dict : dict of dicts
        Dictionary where the first layer of keys denotes the instrument data
        name and the second layer provides the desired statistics
    data_units : dict
        Dictionary containing the units for the data

    Notes
    -----
    Statistics are calculated using PyForecastTools (imported as verify).
    See notes there for more details.

    all - all statistics
    all_bias - bias, meanPercentageError, medianLogAccuracy,
               symmetricSignedBias
    accuracy - returns dict with mean squared error, root mean squared error,
               mean absolute error, and median absolute error
    scaledAccuracy - returns dict with normaled root mean squared error, mean
                     absolute scaled error, mean absolute percentage error,
                     median absolute percentage error, median symmetric
                     accuracy
    bias - scale-dependent bias as measured by the mean error
    meanPercentageError - mean percentage error
    medianLogAccuracy - median of the log accuracy ratio
    symmetricSignedBias - Symmetric signed bias, as a percentage
    meanSquaredError - mean squared error
    RMSE - root mean squared error
    meanAbsError - mean absolute error
    medAbsError - median absolute error
    nRMSE - normaized root mean squared error
    scaledError - scaled error (see PyForecastTools for references)
    MASE - mean absolute scaled error
    forecastError - forecast error (see PyForecastTools for references)
    percError - percentage error
    absPercError - absolute percentage error
    logAccuracy - log accuracy ratio
    medSymAccuracy - Scaled measure of accuracy
    meanAPE - mean absolute percentage error

    """
    import verify  # PyForecastTools
    from pysat import utils

    warnings.warn(' '.join(["This function is deprecated here and will be",
                            "removed in pysat 3.0.0. Please use",
                            "pysatModelUtils instead:"
                            "https://github.com/pysat/pysatModelUtils"]),
                  DeprecationWarning, stacklevel=2)

    method_rout = {"bias": verify.bias, "accuracy": verify.accuracy,
                   "meanPercentageError": verify.meanPercentageError,
                   "medianLogAccuracy": verify.medianLogAccuracy,
                   "symmetricSignedBias": verify.symmetricSignedBias,
                   "meanSquaredError": verify.meanSquaredError,
                   "RMSE": verify.RMSE, "meanAbsError": verify.meanAbsError,
                   "medAbsError": verify.medAbsError, "MASE": verify.MASE,
                   "scaledAccuracy": verify.scaledAccuracy,
                   "nRMSE": verify.nRMSE, "scaledError": verify.scaledError,
                   "forecastError": verify.forecastError,
                   "percError": verify.percError, "meanAPE": verify.meanAPE,
                   "absPercError": verify.absPercError,
                   "logAccuracy": verify.logAccuracy,
                   "medSymAccuracy": verify.medSymAccuracy}

    replace_keys = {'MSE': 'meanSquaredError', 'MAE': 'meanAbsError',
                    'MdAE': 'medAbsError', 'MAPE': 'meanAPE',
                    'MdSymAcc': 'medSymAccuracy'}

    # Grouped methods for things that don't have convenience functions
    grouped_methods = {"all_bias": ["bias", "meanPercentageError",
                                    "medianLogAccuracy",
                                    "symmetricSignedBias"],
                       "all": list(method_rout.keys())}

    # Replace any group method keys with the grouped methods
    for gg in [(i, mm) for i, mm in enumerate(methods)
               if mm in list(grouped_methods.keys())]:
        # Extend the methods list to include all the grouped methods
        methods.extend(grouped_methods[gg[1]])
        # Remove the grouped method key
        methods.pop(gg[0])

    # Ensure there are no duplicate methods
    methods = list(set(methods))

    # Test the input
    if pairs is None:
        raise ValueError('must provide Dataset of paired observations')

    if len(inst_name) != len(mod_name):
        raise ValueError('must provide equal number of instrument and model ' +
                         'data names for comparison')

    if not np.all([iname in pairs.data_vars.keys() for iname in inst_name]):
        raise ValueError('unknown instrument data value supplied')

    if not np.all([iname in pairs.data_vars.keys() for iname in mod_name]):
        raise ValueError('unknown model data value supplied')

    if not np.all([mm in list(method_rout.keys()) for mm in methods]):
        known_methods = list(method_rout.keys())
        known_methods.extend(list(grouped_methods.keys()))
        unknown_methods = [mm for mm in methods
                           if mm not in list(method_rout.keys())]
        raise ValueError('unknown statistical method(s) requested:\n' +
                         '{:}\nuse only:\n{:}'.format(unknown_methods,
                                                      known_methods))

    # Initialize the output
    stat_dict = {iname: dict() for iname in inst_name}
    data_units = {iname: pairs.data_vars[iname].units for iname in inst_name}

    # Cycle through all of the data types
    for i, iname in enumerate(inst_name):
        # Determine whether the model data needs to be scaled
        iscale = utils.scale_units(pairs.data_vars[iname].units,
                                   pairs.data_vars[mod_name[i]].units)
        mod_scaled = pairs.data_vars[mod_name[i]].values.flatten() * iscale

        # Flatten both data sets, since accuracy routines require 1D arrays
        inst_dat = pairs.data_vars[iname].values.flatten()

        # Ensure no NaN are used in statistics
        inum = np.where(np.isfinite(mod_scaled) & np.isfinite(inst_dat))[0]


        if inum.shape[0] < 2:
            # Not all data types can use all statistics.  Print warnings
            # instead of stopping processing.  Only valid statistics
            # will be included in output
            logger.info("{:s} can't calculate stats for {:d} finite samples".format( \
                                                        iname, inum.shape[0]))
            stat_dict
        else:
            # Calculate all of the desired statistics
            for mm in methods:
                try:
                    stat_dict[iname][mm] = method_rout[mm](mod_scaled[inum],
                                                           inst_dat[inum])

                    # Convenience functions add layers to the output, remove
                    # these layers
                    if hasattr(stat_dict[iname][mm], "keys"):
                        for nn in stat_dict[iname][mm].keys():
                            new = replace_keys[nn] if nn in replace_keys.keys()\
                                else nn
                            stat_dict[iname][new] = stat_dict[iname][mm][nn]
                        del stat_dict[iname][mm]
                except ValueError as verr:
                    # Not all data types can use all statistics.  Print warnings
                    # instead of stopping processing.  Only valid statistics
                    # will be included in output
                    logger.warning("{:s} can't use {:s}: {:}".format(iname, mm, verr))
                except NotImplementedError:
                    # Not all data types can use all statistics.  Print warnings
                    # instead of stopping processing.  Only valid statistics
                    # will be included in output
                    logger.warning("{:s} can't implement {:s}".format(iname, mm))

    return stat_dict, data_units
Exemple #5
0
def extract_modelled_observations(inst=None, model=None, inst_name=[],
                                  mod_name=[], mod_datetime_name=None,
                                  mod_time_name=None, mod_units=[],
                                  sel_name=None, method='linear',
                                  model_label='model'):
    """Extracts instrument-aligned data from a modelled data set

    .. deprecated:: 2.2.0
      `extract_modelled_observations` will be removed in pysat 3.0.0, it will
      be added to pysatModels

    Parameters
    ----------
    inst : pysat.Instrument instance
        instrument object for which modelled data will be extracted
    model : xarray Dataset
        modelled data set
    inst_name : list of strings
        list of names of the data series to use for determing instrument
        location
    mod_name : list of strings
        list of names of the data series to use for determing model locations
        in the same order as inst_name.  These must make up a regular grid.
    mod_datetime_name : string
        Name of the data series in the model Dataset containing datetime info
    mod_time_name : string
        Name of the time coordinate in the model Dataset
    mod_units : list of strings
        units for each of the mod_name location attributes.  Currently
        supports: rad/radian(s), deg/degree(s), h/hr(s)/hour(s), m, km, and cm
    sel_name : list of strings or NoneType
        list of names of modelled data indices to append to instrument object,
        or None to append all modelled data (default=None)
    method : string
        Interpolation method.  Supported are 'linear', 'nearest', and
        'splinef2d'.  The last is only supported for 2D data and is not
        recommended here.  (default='linear')
    model_label : string
        name of model, used to identify interpolated data values in instrument
        (default="model")

    Returns
    -------
    added_names : list of strings
        list of names of modelled data added to the instrument

    Notes
    --------
    For best results, select clean instrument data after alignment with model

    """
    from scipy import interpolate
    from pysat import utils

    warnings.warn(' '.join(["This function is deprecated here and will be",
                            "removed in pysat 3.0.0. Please use",
                            "pysatModelUtils instead:"
                            "https://github.com/pysat/pysatModelUtils"]),
                  DeprecationWarning, stacklevel=2)

    # Test input
    if inst is None:
        raise ValueError('Must provide a pysat instrument object')

    if model is None:
        raise ValueError('Must provide modelled data')

    if mod_datetime_name is None:
        raise ValueError('Need datetime key for model datasets')

    if mod_time_name is None:
        raise ValueError('Need time coordinate name for model datasets')

    if len(inst_name) == 0:
        estr = 'Must provide instrument location attribute names as a list'
        raise ValueError(estr)

    if len(inst_name) != len(mod_name):
        estr = 'Must provide the same number of instrument and model '
        estr += 'location attribute names as a list'
        raise ValueError(estr)

    if len(mod_name) != len(mod_units):
        raise ValueError('Must provide units for each model location ' +
                         'attribute')

    inst_scale = np.ones(shape=len(inst_name), dtype=float)
    for i, ii in enumerate(inst_name):
        if ii not in list(inst.data.keys()):
            raise ValueError('Unknown instrument location index ' +
                             '{:}'.format(ii))
        inst_scale[i] = utils.scale_units(mod_units[i],
                                          inst.meta.data.units[ii])

    # Determine which data to interpolate and initialize the interpolated
    # output
    if sel_name is None:
        sel_name = list(model.data_vars.keys())

    for mi in mod_name:
        if mi in sel_name:
            sel_name.pop(sel_name.index(mi))

    # Determine the model time resolution
    tm_sec = (np.array(model.data_vars[mod_datetime_name][1:]) -
              np.array(model.data_vars[mod_datetime_name][:-1])).min()
    tm_sec /= np.timedelta64(1, 's')
    ti_sec = (inst.index[1:] - inst.index[:-1]).min().total_seconds()
    min_del = tm_sec if tm_sec < ti_sec else ti_sec

    # Determine which instrument observations are within the model time
    # resolution of a model run
    mind = list()
    iind = list()
    for i, tt in enumerate(np.array(model.data_vars[mod_datetime_name])):
        del_sec = abs(tt - inst.index).total_seconds()
        if del_sec.min() <= min_del:
            iind.append(del_sec.argmin())
            mind.append(i)

    # Determine the model coordinates closest to the satellite track
    interp_data = dict()
    interp_shape = inst.index.shape if inst.pandas_format else \
        inst.data.data_vars.items()[0][1].shape
    inst_coord = {kk: getattr(inst.data, inst_name[i]).values * inst_scale[i]
                  for i, kk in enumerate(mod_name)}
    for i, ii in enumerate(iind):
        # Cycle through each model data type, since it may not depend on
        # all the dimensions
        for mdat in sel_name:
            # Determine the dimension values
            dims = list(model.data_vars[mdat].dims)
            ndim = model.data_vars[mdat].data.shape
            indices = {mod_time_name: mind[i]}

            # Construct the data needed for interpolation
            values = model[indices][mdat].data
            points = [model.coords[kk].data for kk in dims if kk in mod_name]
            get_coords = True if len(points) > 0 else False
            idims = 0

            while get_coords:
                if inst.pandas_format:
                    # This data iterates only by time
                    xout = ii
                    xi = [inst_coord[kk][ii] for kk in dims if kk in mod_name]
                    get_coords = False
                else:
                    # This data may have additional dimensions
                    if idims == 0:
                        # Determine the number of dimensions
                        idims = len(inst.data.coords)
                        idim_names = inst.data.coords.keys()[1:]

                        # Find relevent dimensions for cycling and slicing
                        ind_dims = [k for k, kk in enumerate(inst_name)
                                    if kk in idim_names]
                        imod_dims = [k for k in ind_dims
                                     if mod_name[k] in dims]
                        ind_dims = [inst.data.coords.keys().index(inst_name[k])
                                    for k in imod_dims]

                        # Set the number of cycles
                        icycles = 0
                        ncycles = sum([len(inst.data.coords[inst_name[k]])
                                       for k in imod_dims])
                        cinds = np.zeros(shape=len(imod_dims), dtype=int)

                    # Get the instrument coordinate for this cycle
                    if icycles < ncycles or icycles == 0:
                        ss = [ii if k == 0 else 0 for k in range(idims)]
                        se = [ii + 1 if k == 0 else
                              len(inst.data.coords[idim_names[k-1]])
                              for k in range(idims)]
                        xout = [cinds[ind_dims.index(k)] if k in ind_dims
                                else slice(ss[k], se[k]) for k in range(idims)]
                        xind = [cinds[ind_dims.index(k)] if k in ind_dims
                                else ss[k] for k in range(idims)]
                        xout = tuple(xout)
                        xind = tuple(xind)

                        xi = list()
                        for kk in dims:
                            if kk in mod_name:
                                # This is the next instrument coordinate
                                k = mod_name.index(kk)
                                if k in imod_dims:
                                    # This is an xarray coordiante
                                    xi.append(inst_coord[kk][cinds[k]])
                                else:
                                    # This is an xarray variable
                                    xi.append(inst_coord[kk][xind])

                        # Cycle the indices
                        if len(cinds) > 0:
                            k = 0
                            cinds[k] += 1

                            while cinds[k] > \
                                inst.data.coords.dims[inst_name[imod_dims[k]]]:
                                k += 1
                                if k < len(cinds):
                                    cinds[k-1] = 0
                                    cinds[k] += 1
                                else:
                                    break
                        icycles += 1

                    # If we have cycled through all the coordinates for this
                    # time, move onto the next time
                    if icycles >= ncycles:
                        get_coords = False

                # Interpolate the desired value
                try:
                    yi = interpolate.interpn(points, values, xi, method=method)
                except ValueError as verr:
                    if str(verr).find("requested xi is out of bounds") > 0:
                        # This is acceptable, pad the interpolated data with
                        # NaN
                        logger.warning("{:} for ".format(verr) +
                              "{:s} data at {:}".format(mdat, xi))
                        yi = [np.nan]
                    else:
                        raise ValueError(verr)

                # Save the output
                attr_name = "{:s}_{:s}".format(model_label, mdat)
                if attr_name not in interp_data.keys():
                    interp_data[attr_name] = np.full(shape=interp_shape,
                                                     fill_value=np.nan)
                interp_data[attr_name][xout] = yi[0]

    # Test and ensure the instrument data doesn't already have the interpolated
    # data.  This should not happen
    if np.any([mdat in inst.data.keys() for mdat in interp_data.keys()]):
        raise ValueError("instrument object already contains model data")

    # Update the instrument object and attach units to the metadata
    for mdat in interp_data.keys():
        attr_name = mdat.split("{:s}_".format(model_label))[-1]
        inst.meta[mdat] = {inst.units_label: model.data_vars[attr_name].units}

        if inst.pandas_format:
            inst[mdat] = pds.Series(interp_data[mdat], index=inst.index)
        else:
            inst.data = inst.data.assign(interp_key=(inst.data.coords.keys(),
                                                     interp_data[mdat]))
            inst.data.rename({"interp_key": mdat}, inplace=True)

    return interp_data.keys()
Exemple #6
0
    def to_pysat(self, flatten_twod=True,
                 labels={'units': ('Units', str), 'name': ('Long_Name', str),
                         'notes': ('Var_Notes', str), 'desc': ('CatDesc', str),
                         'min_val': ('ValidMin', float),
                         'max_val': ('ValidMax', float),
                         'fill_val': ('FillVal', float)}):
        """
        Exports loaded CDF data into data, meta for pysat module

        Parameters
        ----------
        flatten_twod : bool
            If True, then two dimensional data is flattened across
            columns. Name mangling is used to group data, first column
            is 'name', last column is 'name_end'. In between numbers are
            appended 'name_1', 'name_2', etc. All data for a given 2D array
            may be accessed via, data.ix[:,'item':'item_end']
            If False, then 2D data is stored as a series of DataFrames,
            indexed by Epoch. data.ix[0, 'item']  (default=True)

        labels : dict
            Dict where keys are the label attribute names and the values
            are tuples that have the label values and value types in
            that order.
            (default={'units': ('units', str), 'name': ('long_name', str),
                      'notes': ('notes', str), 'desc': ('desc', str),
                      'min_val': ('value_min', float),
                      'max_val': ('value_max', float)
                      'fill_val': ('fill', float)})

        Returns
        -------
        pandas.DataFrame, pysat.Meta
            Data and Metadata suitable for attachment to a pysat.Instrument
            object.

        Note
        ----
        The *_labels should be set to the values in the file, if present.
        Note that once the meta object returned from this function is attached
        to a pysat.Instrument object then the *_labels on the Instrument
        are assigned to the newly attached Meta object.

        The pysat Meta object will use data with labels that match the patterns
        in *_labels even if the case does not match.

        """
        # Create pysat.Meta object using data above
        # and utilizing the attribute labels provided by the user
        meta = pysat.Meta(pds.DataFrame.from_dict(self.meta, orient='index'),
                          labels=labels)

        cdata = self.data.copy()
        lower_names = [name.lower() for name in meta.keys()]
        for name, true_name in zip(lower_names, meta.keys()):
            if name == 'epoch':
                meta.data.rename(index={true_name: 'epoch'}, inplace=True)
                epoch = cdata.pop(true_name)
                cdata['Epoch'] = epoch

        data = dict()
        index = None
        for varname, df in cdata.items():
            if varname not in ('Epoch', 'DATE'):
                if type(df) == pds.Series:
                    data[varname] = df

                    # CDF data Series are saved using a mix of Range and
                    # Datetime Indexes. This requires that the user specify
                    # the desired index when creating a DataFrame
                    if type(df.index) == pds.DatetimeIndex and index is None:
                        index = df.index

        if index is None:
            raise ValueError(''.join(['cdflib did not load a DatetimeIndex, ',
                                      'not pysat compatible']))

        try:
            data = pds.DataFrame(data, index=index)
        except pds.core.indexes.base.InvalidIndexError as ierr:
            estr = "Invalid times in data file(s): {:}".format(str(ierr))
            logger.warning(estr)
            data = pds.DataFrame(None)

        return data, meta
Exemple #7
0
    def set_epoch(self, x_axis_var):
        """Stores epoch dependency

        Parameters
        ----------
        x_axis_var : str
            name of variable

        """

        data_type_description = self._cdf_file.varinq(
            x_axis_var)['Data_Type_Description']

        center_measurement = self._center_measurement
        cdf_file = self._cdf_file
        if self.get_dependency(x_axis_var) is None:
            delta_plus_var = 0.0
            delta_minus_var = 0.0
            has_plus_minus = [False, False]

            xdata = cdf_file.varget(x_axis_var)
            epoch_var_atts = cdf_file.varattsget(x_axis_var)

            # Check for DELTA_PLUS_VAR/DELTA_MINUS_VAR attributes
            if center_measurement:
                if 'DELTA_PLUS_VAR' in epoch_var_atts:
                    delta_plus_var = cdf_file.varget(
                        epoch_var_atts['DELTA_PLUS_VAR'])
                    delta_plus_var_att = cdf_file.varattsget(
                        epoch_var_atts['DELTA_PLUS_VAR'])
                    has_plus_minus[0] = True

                    # Check if a conversion to seconds is required
                    if 'SI_CONVERSION' in delta_plus_var_att:
                        si_conv = delta_plus_var_att['SI_CONVERSION']
                        delta_plus_var = delta_plus_var.astype(float) \
                            * np.float(si_conv.split('>')[0])
                    elif 'SI_CONV' in delta_plus_var_att:
                        si_conv = delta_plus_var_att['SI_CONV']
                        delta_plus_var = delta_plus_var.astype(float) \
                            * np.float(si_conv.split('>')[0])

                if 'DELTA_MINUS_VAR' in epoch_var_atts:
                    delta_minus_var = cdf_file.varget(
                        epoch_var_atts['DELTA_MINUS_VAR'])
                    delta_minus_var_att = cdf_file.varattsget(
                        epoch_var_atts['DELTA_MINUS_VAR'])
                    has_plus_minus[1] = True

                    # Check if a conversion to seconds is required
                    if 'SI_CONVERSION' in delta_minus_var_att:
                        si_conv = delta_minus_var_att['SI_CONVERSION']
                        delta_minus_var = \
                            delta_minus_var.astype(float) \
                            * np.float(si_conv.split('>')[0])
                    elif 'SI_CONV' in delta_minus_var_att:
                        si_conv = delta_minus_var_att['SI_CONV']
                        delta_minus_var = \
                            delta_minus_var.astype(float) \
                            * np.float(si_conv.split('>')[0])

            if ('CDF_TIME' in data_type_description) \
                    or ('CDF_EPOCH' in data_type_description):
                if self._datetime:
                    # Convert xdata to datetime
                    try:
                        new_xdata = cdflib.cdfepoch.to_datetime(xdata)
                    except TypeError as terr:
                        estr = ("Invalid data file(s). Please contact CDAWeb "
                                "for assistance: {:}".format(str(terr)))
                        logger.warning(estr)
                        new_xdata = []

                    # Add delta to time, if both plus and minus are defined
                    if np.all(has_plus_minus):
                        # This defines delta_time in seconds supplied
                        delta_time = np.asarray((delta_plus_var
                                                 - delta_minus_var) / 2.0)

                        # delta_time may be a single value or an array
                        xdata = [xx + dt.timedelta(seconds=int(delta_time))
                                 if delta_time.shape == ()
                                 else xx + dt.timedelta(seconds=delta_time[i])
                                 for i, xx in enumerate(new_xdata)]
                    else:
                        xdata = new_xdata

                self.set_dependency(x_axis_var, xdata)
Exemple #8
0
def calculate_imf_steadiness(inst,
                             steady_window=15,
                             min_window_frac=0.75,
                             max_clock_angle_std=(90.0 / np.pi),
                             max_bmag_cv=0.5):
    """ Calculate IMF steadiness using clock angle standard deviation and
    the coefficient of variation of the IMF magnitude in the GSM Y-Z plane

    Parameters
    ----------
    inst : pysat.Instrument
        Instrument with OMNI HRO data
    steady_window : int
        Window for calculating running statistical moments in min (default=15)
    min_window_frac : float
        Minimum fraction of points in a window for steadiness to be calculated
        (default=0.75)
    max_clock_angle_std : float
        Maximum standard deviation of the clock angle in degrees (default=22.5)
    max_bmag_cv : float
        Maximum coefficient of variation of the IMF magnitude in the GSM
        Y-Z plane (default=0.5)

    """

    # We are not going to interpolate through missing values
    rates = {'': 1, '1min': 1, '5min': 5}
    sample_rate = int(rates[inst.tag])
    max_wnum = np.floor(steady_window / sample_rate)
    if max_wnum != steady_window / sample_rate:
        steady_window = max_wnum * sample_rate
        logger.warning("sample rate is not a factor of the statistical window")
        logger.warning(
            "new statistical window is {:.1f}".format(steady_window))

    min_wnum = int(np.ceil(max_wnum * min_window_frac))

    # Calculate the running coefficient of variation of the BYZ magnitude
    byz_mean = inst['BYZ_GSM'].rolling(min_periods=min_wnum,
                                       center=True,
                                       window=steady_window).mean()
    byz_std = inst['BYZ_GSM'].rolling(min_periods=min_wnum,
                                      center=True,
                                      window=steady_window).std()
    inst['BYZ_CV'] = pds.Series(byz_std / byz_mean, index=inst.data.index)

    # Calculate the running circular standard deviation of the clock angle
    circ_kwargs = {'high': 360.0, 'low': 0.0, 'nan_policy': 'omit'}
    try:
        ca_std = \
            inst['clock_angle'].rolling(min_periods=min_wnum,
                                        window=steady_window,
                                        center=True).apply(stats.circstd,
                                                           kwargs=circ_kwargs,
                                                           raw=True)
    except TypeError:
        warnings.warn(' '.join([
            'To automatically remove NaNs from the',
            'calculation, please upgrade to scipy 1.4 or', 'newer'
        ]))
        circ_kwargs.pop('nan_policy')
        ca_std = \
            inst['clock_angle'].rolling(min_periods=min_wnum,
                                        window=steady_window,
                                        center=True).apply(stats.circstd,
                                                           kwargs=circ_kwargs,
                                                           raw=True)
    inst['clock_angle_std'] = pds.Series(ca_std, index=inst.data.index)

    # Determine how long the clock angle and IMF magnitude are steady
    imf_steady = np.zeros(shape=inst.data.index.shape)

    steady = False
    for i, cv in enumerate(inst.data['BYZ_CV']):
        if steady:
            del_min = int(
                (inst.data.index[i] - inst.data.index[i - 1]).total_seconds() /
                60.0)
            if np.isnan(cv) or np.isnan(ca_std[i]) or del_min > sample_rate:
                # Reset the steadiness flag if fill values are encountered, or
                # if an entry is missing
                steady = False

        if cv <= max_bmag_cv and ca_std[i] <= max_clock_angle_std:
            # Steadiness conditions have been met
            if steady:
                imf_steady[i] = imf_steady[i - 1]

            imf_steady[i] += sample_rate
            steady = True

    inst['IMF_Steady'] = pds.Series(imf_steady, index=inst.data.index)
    return