Ejemplo n.º 1
0
def sub_core(other, newdata):
    """Actually do the subtraction."""
    if isinstance(other, (slice, int_types)) or callable(other):
        newdata.del_rows(other)
    elif isinstance(other, list) and (all_type(other, int_types)
                                      or all_type(other, bool)):
        newdata.del_rows(other)
    else:
        newdata = NotImplemented
    newdata._data._setas.shape = newdata.shape
    return newdata
Ejemplo n.º 2
0
    def std(self, column=None, sigma=None, bounds=None):
        """Find standard deviation value of col_a data column.

        Args:
            column (index):
                Column to look for the maximum in

        Keyword Arguments:
            sigma (column index or array):
                The uncertainity noted for each value in the mean
            bounds (callable):
                col_a callable function that takes col_a single argument list of
                numbers representing one row, and returns True for all rows to search in.

        Returns:
            (float):
                The standard deviation of the data.

        Note:
            If column is not defined (or is None) the :py:attr:`DataFile.setas` column
            assignments are used.

        .. todo::
            Fix the row index when the bounds function is used - see note of :py:meth:`AnalysisMixin.max`
        """
        _ = self._col_args(scalar=True, ycol=column, yerr=sigma)

        if bounds is not None:
            self._push_mask()
            self._set_mask(bounds, True, _.ycol)

        if isIterable(sigma) and len(sigma) == len(self) and all_type(sigma, float):
            sigma = np.array(sigma)
        elif _.yerr:
            sigma = self.data[:, _.yerr]
        else:
            sigma = np.ones(len(self))

        ydata = self.data[:, _.ycol]

        sigma = np.abs(sigma) / np.nanmax(np.abs(sigma))
        sigma = np.where(sigma < 1e-8, 1e-8, sigma)
        weights = 1 / sigma ** 2
        weights[np.isnan(weights)] = 0.0

        result = np.sqrt(np.cov(ydata, aweights=weights))

        if bounds is not None:
            self._pop_mask()
        return result
Ejemplo n.º 3
0
    def mean(self, column=None, sigma=None, bounds=None):
        """Find mean value of col_a data column.

        Args:
            column (index):
                Column to look for the maximum in

        Keyword Arguments:
            sigma (column index or array):
                The uncertainity noted for each value in the mean
            bounds (callable):
                col_a callable function that takes col_a single argument list of
                numbers representing one row, and returns True for all rows to search in.

        Returns:
            (float):
                The mean of the data.

        Note:
            If column is not defined (or is None) the :py:attr:`DataFile.setas` column
            assignments are used.

        .. todo::
            Fix the row index when the bounds function is used - see note of :py:meth:`AnalysisMixin.max`
        """
        _ = self._col_args(scalar=True, ycol=column, yerr=sigma)

        if bounds is not None:
            self._push_mask()
            self._set_mask(bounds, True, _.ycol)

        if isIterable(sigma) and len(sigma) == len(self) and all_type(sigma, float):
            sigma = np.array(sigma)
            _["has_yerr"] = True
        elif _.has_yerr:
            sigma = self.data[:, _.yerr]

        if not _.has_yerr:
            result = self.data[:, _.ycol].mean()
        else:
            ydata = self.data[:, _.ycol]
            w = 1 / (sigma ** 2 + 1e-8)
            norm = w.sum(axis=0)
            error = np.sqrt((sigma ** 2).sum(axis=0)) / len(sigma)
            result = (ydata * w).mean(axis=0) / norm, error
        if bounds is not None:
            self._pop_mask()
        return result
Ejemplo n.º 4
0
    def swap_column(self, *swp, **kargs):
        """Swaps pairs of columns in the data.

        Useful for reordering data for idiot programs that expect columns in a fixed order.

        Args:
            swp  (tuple of list of tuples of two elements):
                Each element will be iused as a column index (using the normal rules
                for matching columns).  The two elements represent the two
                columns that are to be swapped.
            headers_too (bool):
                Indicates the column headers are swapped as well

        Returns:
            self:
                A copy of the modified :py:class:`DataFile` objects

        Note:
            If swp is a list, then the function is called recursively on each
            element of the list. Thus in principle the @swp could contain
            lists of lists of tuples
        """
        headers_too = kargs.pop("headers_too", True)
        setas_too = kargs.pop("setas_too", True)

        if len(swp) == 1:
            swp = swp[0]
        if isinstance(swp, list) and all_type(swp, tuple) and all_size(swp, 2):
            for item in swp:
                self.swap_column(item, headers_too=headers_too)
        elif isinstance(swp, tuple):
            col1 = self._setas.find_col(swp[0])
            col2 = self._setas.find_col(swp[1])
            self[:, [col1, col2]] = self[:, [col2, col1]]
            if headers_too:
                self._setas.column_headers[col1], self._setas.column_headers[
                    col2] = (
                        self._setas.column_headers[col2],
                        self._setas.column_headers[col1],
                    )
            if setas_too:
                self._setas[col1], self._setas[col2] = self._setas[
                    col2], self._setas[col1]
        else:
            raise TypeError("Swap parameter must be either a tuple or a \
            list of tuples")
Ejemplo n.º 5
0
def _average_list(listob):
    """Average a list of items picking an appropriate average given the type.

    If no appropriate average is found None will be returned.
    if listob contains nested lists or dicts of numbers then try to average
    individual items within the lists/keys.
    """
    if len(listob) == 0:
        return None
    if not all_type(listob, type(listob[0])):
        return None  # all of the list isn't the same type
    typex = listob[0]
    if isinstance(typex, numbers.Number):
        ret = sum(listob) / float(len(listob))
    elif isinstance(typex, np.ndarray):
        try:
            ret = np.average(tuple(listob))
        except Exception:  # probably incompatible array sizes
            ret = None
    elif isinstance(
            typex,
        (tuple, list)):  # recursively go through sub lists averaging values
        nl = zip(*listob)
        ret = [_average_list(list(i)) for i in nl]
        if isinstance(typex, tuple):
            ret = tuple(ret)
    elif isinstance(
            typex,
            dict):  # recursively go through dictionary keys averaging values
        ret = {}
        for k in typex.keys():
            ret[k] = _average_list([listob[i][k] for i in listob])
    elif isinstance(typex, string_types):
        if all(i == typex for i in listob):
            ret = listob[0]  # all the same text return that string
        else:
            ret = None
    else:
        return None
    return ret
Ejemplo n.º 6
0
def _average_list(listob):
    """Average a list of items picking an appropriate average given the type.

    If no appropriate average is found None will be returned.
    if listob contains nested lists or dicts of numbers then try to average
    individual items within the lists/keys.
    """
    if len(listob) == 0:
        return None
    if not all_type(listob, type(listob[0])):
        return None  # all of the list isn't the same type
    typex = listob[0]
    if isinstance(typex, numbers.Number):
        ret = sum(listob) / float(len(listob))
    elif isinstance(typex, np.ndarray):
        try:
            ret = np.average(tuple(listob))
        except Exception:  # probably incompatible array sizes
            ret = None
    elif isinstance(typex, (tuple, list)):  # recursively go through sub lists averaging values
        nl = zip(*listob)
        ret = [_average_list(list(i)) for i in nl]
        if isinstance(typex, tuple):
            ret = tuple(ret)
    elif isinstance(typex, dict):  # recursively go through dictionary keys averaging values
        ret = {}
        for k in typex.keys():
            ret[k] = _average_list([listob[i][k] for i in listob])
    elif isinstance(typex, string_types):
        if all(i == typex for i in listob):
            ret = listob[0]  # all the same text return that string
        else:
            ret = None
    else:
        return None
    return ret
Ejemplo n.º 7
0
    def slice(self, *args, **kwargs):  # pylint: disable=arguments-differ
        """Return a list of the metadata dictionaries for each item/file in the top level group

        Keyword Arguments:
            *args (string or list of strings):
                if given then only return the item(s) requested from the metadata
            values_only(bool):
                if given and *output* not set only return tuples of the dictionary values. Mostly useful
                when given a single key string
            output (str or type):
                Controls the output format from slice_metadata. Possible values are

                - "dict" or dict - return a list of dictionary subsets of the metadata from each image
                - "list" or list - return a list of values of each item pf the metadata
                - "array" or np.array - return a single array - like list above, but returns as a numpy array. This can create a 2D array from multiple keys
                - "data" or Stoner.Data - returns the metadata in a Stoner.Data object where the column headers are the metadata keys.
                - "frame" - returns the metadata as a Pandas DataFrame object
                - "smart" - switch between *dict* and *list* depending whether there is one or more keys.
            mask_missing (bool):
                If true, then metadata entries missing in members of the folder are returned as masked values (or None), If
                False, then an exception is raised if any entries are missing.

        Returns:
            ret(list of dict, tuple of values or :py:class:`Stoner.Data`):
                depending on *values_only* or (output* returns the sliced dictionaries or tuples/
                values of the items

        To do:
            this should probably be a func in baseFolder and should use have
            recursive options (build a dictionary of metadata values). And probably
            options to extract other parts of objects (first row or whatever).
        """
        values_only = kwargs.pop("values_only", False)
        output = kwargs.pop("output", None)
        mask_missing = kwargs.pop("mask_missing", False)
        if kwargs:
            raise SyntaxError("Unused keyword arguments : {}".format(kwargs))
        if output is None:  # Sort out a definitive value of output
            output = "dict" if not values_only else "smart"
        if isinstance(output, string_types):
            output = output.lower()
        if output not in [
                "dict",
                "list",
                "array",
                "data",
                "frame",
                "smart",
                dict,
                list,
                np.ndarray,
                DataFile,
        ]:  # Check for good output value
            raise SyntaxError(
                "output of slice metadata must be either dict, list, or array not {}"
                .format(output))
        keys = []
        for k in args:
            if isinstance(k, string_types):
                keys.append(k)
            elif isiterable(k) and all_type(k, string_types):
                keys.extend(k)
            else:
                raise KeyError(
                    "{} cannot be used as a key name or set of key names".
                    format(type(k)))
        if not mask_missing:
            for k in keys:
                if k not in self.common_keys:
                    raise KeyError(
                        "{} is not a key in all members of the folder".format(
                            k))
        results = []
        for d in self._folder:
            results.append({k: d[k] for k in keys if k in d})

        for r in results:  # Expand the results where a result contains a list
            for k in keys:
                if k in r and islike_list(r[k]) and len(r[k]) > 0:
                    v = r[k]
                    del r[k]
                    r.update(
                        {"{}[{}]".format(k, i): vi
                         for i, vi in enumerate(v)})

        if output == "smart":
            if np.all([
                    len(r) == 1
                    and list(r.keys())[0] == list(results[0].keys())[0]
                    for r in results
            ]):
                output = "list"
            else:
                output = "dict"
        if output in ["list", list]:
            keys = set()
            for r in results:
                keys |= set(r.keys())
            keys = list(keys)
            if len(keys) == 1:
                ret = [r.get(keys[0], None) for r in results]
            else:
                ret = []
                for r in results:
                    ret.append(tuple(r.get(k, None) for k in keys))
        elif output == "dict":
            ret = results
        else:
            from pandas import DataFrame
            from Stoner import Data

            frame = DataFrame(results)
            mask = frame.isna()
            if output == "frame":
                ret = frame
            else:
                ret = Data(frame)
                ret.mask = mask
                if output in ["array", np.ndarray]:
                    ret = ret.data
        return ret
Ejemplo n.º 8
0
    def __getitem__(self, ix):
        """Indexing function for DataArray.

        Args:
            ix (various): Index to find.

        Returns:
            An indexed part of the DataArray object with extra attributes.

        Notes:
            This tries to support all of the indexing operations of a regular numpy array,
            plus the special operations where one columns are named.

        Warning:
            Teh code almost certainly makes some assumptiuons that DataArray is one or 2D and
            may blow up with 3D arrays ! On the other hand it has a special case exception for where
            you give a string as the first index element and assumes that you've forgotten that we're
            row major and tries to do the right thing.
        """
        # Is this goign to be a single row ?
        single_row = isinstance(
            ix, int_types) or (isinstance(ix, tuple) and len(ix) > 0
                               and isinstance(ix[0], int_types))
        # If the index is a single string type, then build a column accessing index
        if isinstance(ix, string_types):
            if self.ndim > 1:
                ix = (slice(None, None, None), self._setas.find_col(ix))
            else:
                ix = (self._setas.find_col(ix), )
        if isinstance(ix, (int_types, slice)):
            ix = (ix, )
        elif isinstance(ix, tuple) and ix and isinstance(
                ix[-1], string_types):  # index still has a string type in it
            ix = list(ix)
            ix[-1] = self._setas.find_col(ix[-1])
            ix = tuple(ix)
        elif (isinstance(ix, tuple) and ix
              and isinstance(ix[-1], _np_.ndarray)
              and self.ndim == 1):  # Indexing with a numpy array
            if len(ix) == 1:
                ix = ix[0]
        elif isinstance(ix, tuple) and ix and isiterable(
                ix[-1]):  # indexing with a list of columns
            ix = list(ix)
            if all_type(ix[-1], bool):
                ix[-1] = _np_.arange(len(ix[-1]))[ix[-1]]
            ix[-1] = [self._setas.find_col(c) for c in ix[-1]]
            ix = tuple(ix)
        elif isinstance(ix, tuple) and ix and isinstance(
                ix[0], string_types):  # oops! backwards indexing
            c = ix[0]
            ix = list(ix[1:])
            ix.append(self._setas.find_col(c))
            ix = tuple(ix)
            # Now can index with our constructed multidimesnional indexer
        ret = super(DataArray, self).__getitem__(ix)
        if ret.ndim == 0 or isinstance(ret, _np_.ndarray) and ret.size == 1:
            return ret.dtype.type(ret)
        elif not isinstance(ret, _np_.ndarray):  # bugout for scalar resturns
            return ret
        elif ret.ndim >= 2:  # Potentially 2D array here
            if ix[-1] is None:  # Special case for increasing an array dimension
                if self.ndim == 1:  # Going from 1 D to 2D
                    ret.setas = self.setas.clone
                    ret.i = self.i
                    ret.name = getattr(self, "name", "Column")
                return ret
            else:  # A regular 2D array
                ret.isrow = single_row
                ret.setas = self.setas.clone
                ret.column_headers = copy.copy(self.column_headers)
                if len(ix) > 0 and isiterable(ix[-1]):  # pylint: disable=len-as-condition
                    ret.column_headers = list(
                        _np_.array(ret.column_headers)[ix[-1]])
                # Sort out whether we need an array of row labels
                if isinstance(self.i, _np_.ndarray) and len(ix) > 0:  # pylint: disable=len-as-condition
                    if isiterable(ix[0]) or isinstance(ix[0], int_types):
                        ret.i = self.i[ix[0]]
                    else:
                        ret.i = 0
                else:
                    ret.i = self.i
        elif ret.ndim == 1:  # Potentially a single row or single column
            ret.isrow = single_row
            if len(ix) == len(self.setas):
                tmp = _np_.array(self.setas)[ix[-1]]
                ret.setas(tmp)
                tmpcol = _np_.array(self.column_headers)[ix[-1]]
                ret.column_headers = tmpcol
            else:
                ret.setas = self.setas.clone
                ret.column_headers = copy.copy(self.column_headers)
            # Sort out whether we need an array of row labels
            if single_row and isinstance(self.i, _np_.ndarray):
                ret.i = self.i[ix[0]]
            else:  # This is a single element?
                ret.i = self.i
            if not single_row:
                ret.name = self.column_headers
        return ret