Example #1
0
    def _linearize(self):

        if not self.linear or self._data is None:
            return

        self._linear = False  # to avoid action of the observer

        if self._squeeze_ndim > 1:
            error_("Linearization is only implemented for 1D data")
            return

        data = self._data.squeeze()

        # try to find an increment
        if data.size > 1:
            inc = np.diff(data)
            variation = (inc.max() - inc.min()) / data.ptp()
            if variation < 1.0e-5:
                self._increment = (
                    data.ptp() / (data.size - 1) * np.sign(inc[0])
                )  # np.mean(inc)  # np.round(np.mean(inc), 5)
                self._offset = data[0]
                self._size = data.size
                self._data = None
                self._linear = True
            else:
                self._linear = False
        else:
            self._linear = False
Example #2
0
    def coord(self, dim="x"):
        """
        Return the coordinates along the given dimension.

        Parameters
        ----------
        dim : int or str
            A dimension index or name, default index = `x`.
            If an integer is provided, it is equivalent to the `axis` parameter for numpy array.

        Returns
        -------
        |Coord|
            Coordinates along the given axis.
        """
        idx = self._get_dims_index(dim)[0]  # should generate an error if the
        # dimension name is not recognized
        if idx is None:
            return None

        if self._coordset is None:
            return None

        # idx is not necessarily the position of the coordinates in the CoordSet
        # indeed, transposition may have taken place. So we need to retrieve the coordinates by its name
        name = self.dims[idx]
        if name in self._coordset.names:
            idx = self._coordset.names.index(name)
            return self._coordset[idx]
        else:
            error_(f"could not find this dimenson name: `{name}`")
            return None
Example #3
0
    def plot(self, **kwargs):
        """
        Generic plot function.

        This apply to a |NDDataset| but actually delegate the work to a plotter defined by the parameter ``method``.
        """

        # --------------------------------------------------------------------
        # select plotter depending on the dimension of the data
        # --------------------------------------------------------------------

        method = 'generic'

        method = kwargs.pop('method', method)

        # Find or guess the adequate plotter
        # -----------------------------------

        _plotter = getattr(self, f"plot_{method.replace('+', '_')}", None)
        if _plotter is None:
            # no plotter found
            error_('The specified plotter for method '
                   '`{}` was not found!'.format(method))
            raise IOError

        # Execute the plotter
        # --------------------

        return _plotter(**kwargs)
Example #4
0
    def __getitem__(self, key):

        # search on the preferences
        if self.parent is not None:
            res = getattr(self.parent, f"{self.name}_{key}")
        elif hasattr(plot_preferences, key):
            res = getattr(plot_preferences, key)
        elif hasattr(preferences, key):
            res = getattr(preferences, key)
        else:
            alias = self._get_alias(key)
            if alias:
                if isinstance(alias, list):
                    res = PreferencesSet(
                        parent=self,
                        name=key,
                        **dict([(n, getattr(self, f"{key}_{n}"))
                                for n in alias]),
                    )
                else:
                    res = getattr(self, alias)
            else:
                res = super().__getitem__(key)
                if res is None:
                    error_(
                        f"not found {key}"
                    )  # key = key.replace('_','.').replace('...', '_').replace('..',
                    # '-')  #  # res = mpl.rcParams[key]

        return res
Example #5
0
    def __setitem__(self, key, value):

        # also change the corresponding preferences
        if hasattr(plot_preferences, key):
            try:
                setattr(plot_preferences, key, value)
            except TraitError:
                value = type(plot_preferences.traits()[key].default_value)(value)
                setattr(plot_preferences, key, value)
        elif hasattr(preferences, key):
            setattr(preferences, key, value)
        elif key in self.keys():
            newkey = f'{self.name}_{key}'
            setattr(plot_preferences, newkey, value)
            self.parent[newkey] = value
            return
        else:
            # try to find an alias for matplotlib values
            alias = self._get_alias(key)
            if alias:
                newkey = f'{alias}_{key}'
                setattr(plot_preferences, newkey, value)
                self.parent[newkey] = value
            else:
                error_(f'not found {key}')
            return

        super().__setitem__(key, value)
Example #6
0
    def execute(self, localvars=None):
        co = 'from spectrochempy import *\n' \
             'import spectrochempy as scp\n' + self._content
        code = compile(co, '<string>', 'exec')
        if localvars is None:
            # locals was not passed, try to avoid missing values for name
            # such as 'project', 'proj', 'newproj'...
            # other missing name if they correspond to the parent project
            # will be subtitued latter upon exception
            localvars = locals()
            # localvars['proj']=self.parent
            # localvars['project']=self.parent

        try:
            exec(code, globals(), localvars)
            return

        except NameError as e:
            # most of the time, a script apply to a project
            # let's try to substitute the parent to the missing name
            regex = re.compile(r"'(\w+)'")
            s = regex.search(e.args[0]).group(1)
            localvars[s] = self.parent  # lgtm [py/modification-of-locals]
            # TODO: check if this a real error or not  (need to come
            #  back on this later)
        try:
            exec(code, globals(), localvars)
        except NameError as e:
            error_(e + '. pass the variable `locals()` : this may solve '
                   'this problem! ')
Example #7
0
    def __setitem__(self, items, value):

        if self.linear:
            error_("Linearly defined array are readonly")
            return

        super().__setitem__(items, value)
Example #8
0
    def makestyle(self, filename='mydefault', to_mpl=False):

        if filename.startswith('scpy'):
            error_('`scpy` is READ-ONLY. Please use an another style name.')
            return

        txt = ""
        sline = ""

        for key in mpl.rcParams.keys():
            if key in ['animation.avconv_args', 'animation.avconv_path', 'animation.html_args', 'keymap.all_axes',
                       'mathtext.fallback_to_cm', 'validate_bool_maybe_none', 'savefig.jpeg_quality',
                       'text.latex.preview', 'backend', 'backend_fallback', 'date.epoch', 'docstring.hardcopy',
                       'figure.max_open_warning', 'figure.raise_window', 'interactive', 'savefig.directory', 'timezone',
                       'tk.window_focus', 'toolbar', 'webagg.address', 'webagg.open_in_browser', 'webagg.port',
                       'webagg.port_retries']:
                continue
            val = str(mpl.rcParams[key])
            sav = ''
            while val != sav:
                sav = val
                val = val.replace('  ', ' ')
            line = f'{key:40s} : {val}\n'
            if line[0] != sline:
                txt += '\n'
                sline = line[0]
            if key not in ['axes.prop_cycle']:
                line = line.replace('[', '').replace(']', "").replace('\'', '').replace('"', '')
            if key == 'savefig.bbox':
                line = f'{key:40s} : standard\n'
            txt += line.replace("#", '')

        # Non matplotlib parameters,
        # some parameters are not saved in matplotlib style sheets so we willa dd them here
        nonmplpars = ['method_1D', 'method_2D', 'method_3D', 'colorbar', 'show_projections', 'show_projection_x',
                      'show_projection_y', 'colormap', 'max_lines_in_stack', 'simplify', 'number_of_x_labels',
                      'number_of_y_labels', 'number_of_z_labels', 'number_of_contours', 'contour_alpha',
                      'contour_start', 'antialiased', 'rcount', 'ccount']
        txt += '\n\n##\n## ADDITIONAL PARAMETERS FOR SPECTROCHEMPY\n##\n'
        for par in nonmplpars:
            txt += f"##@{par:37s} : {getattr(self, par)}\n"

        stylesheet = (pathclean(self.stylesheets) / filename).with_suffix('.mplstyle')
        stylesheet.write_text(txt)

        if to_mpl:
            # make it also accessible to pyplot
            stylelib = (pathclean(mpl.get_configdir()) / 'stylelib' / filename).with_suffix('.mplstyle')
            stylelib.write_text(txt)

        # plot_preferences.traits()['style'].trait_types = plot_preferences.traits()['style'].trait_types +\
        #                                                       (Unicode(filename),)
        self.style = filename
        return self.style
    def wrapper(dataset, **kwargs):

        # On which axis do we want to shift (get axis from arguments)
        axis, dim = dataset.get_axis(**kwargs, negative_axis=True)

        # output dataset inplace (by default) or not
        if not kwargs.pop("inplace", False):
            new = dataset.copy()  # copy to be sure not to modify this dataset
        else:
            new = dataset

        swapped = False
        if axis != -1:
            new.swapdims(axis, -1, inplace=True)  # must be done in  place
            swapped = True

        x = new.coordset[dim]
        if hasattr(x, "_use_time_axis"):
            x._use_time_axis = True  # we need to havze dimentionless or time units

        # get the lastcoord
        if x.unitless or x.dimensionless or x.units.dimensionality == "[time]":

            if not x.linear:
                # This method apply only to linear coordinates.
                # we try to linearize it
                x = LinearCoord(x)

            if not x.linear:
                raise TypeError("Coordinate x is not linearisable")

            data = method(new.data, **kwargs)
            new._data = data

            # we needs to increase the x coordinates array
            x._size = new._data.shape[-1]

            # update with the new td
            new.meta.td[-1] = x.size
            new.history = f"`{method.__name__}` shift performed on dimension `{dim}` with parameters: {kwargs}"

        else:
            error_(
                "zero-filling apply only to dimensions with [time] dimensionality or dimensionless coords\n"
                "The processing was thus cancelled"
            )

        # restore original data order if it was swapped
        if swapped:
            new.swapdims(axis, -1, inplace=True)  # must be done inplace

        return new
Example #10
0
def test_ndmath_unary_ufuncs_simple_data(nd2d, name, comment):
    nd1 = nd2d.copy() / 1.0e10  # divide to avoid some overflow in exp ufuncs

    # simple unitless NDDataset
    # --------------------------
    assert nd1.unitless

    f = getattr(np, name)
    f(nd1)
    # assert isinstance(r, NDDataset)

    # NDDataset with units
    # ---------------------
    nd1.units = ur.absorbance
    f = getattr(np, name)

    # TODO: some ufunc suppress the units! see pint.
    skip = False

    # if name not in NDDataset.__remove_units__:
    #
    #     try:
    #         f(Quantity(1., nd1.units)).units
    #     except TypeError as e:
    #         error_(f"{name} :", e)
    #         skip = True
    #     except AttributeError:
    #         if name in ['positive', 'fabs', 'cbrt', 'spacing',
    #                     'signbit', 'isnan', 'isinf', 'isfinite', 'logical_not',
    #                     'log2', 'log10', 'log1p', 'exp2', 'expm1']:
    #             pass  # already solved
    #         else:
    #             info_(f"\n =======> {name} remove units! \n")
    #     except DimensionalityError as e:
    #         error_(f"{name} :", e)
    #         skip = True

    if not skip:
        try:
            f(nd1)
            # assert isinstance(r, NDDataset)

            nd1 = nd2d.copy()  # reset nd

            # with units and mask
            nd1.units = ur.absorbance
            nd1[1, 1] = MASKED

            f(nd1)

        except DimensionalityError as e:
            error_(f"{name}: ", e)
Example #11
0
    def _plot_generic(self, **kwargs):

        if self._squeeze_ndim == 1:

            ax = plot_1D(self, **kwargs)

        elif self._squeeze_ndim == 2:

            ax = plot_2D(self, **kwargs)

        elif self._squeeze_ndim == 3:

            ax = plot_3D(self, **kwargs)

        else:
            error_("Cannot guess an adequate plotter, nothing done!")
            return False

        return ax
Example #12
0
    def plot_generic(self, **kwargs):
        """
        The generic plotter.

        It try to guess an adequate basic plot for the data. Other method of plotters are defined explicitely in the
        ``plotters`` package.

        Parameters
        ----------
        ax : :class:`matplotlib.axe`
            the viewplot where to plot.
        kwargs : optional additional arguments

        Returns
        -------
        ax
            Return the handler to ax where the main plot was done
        """

        if self._squeeze_ndim == 1:

            ax = plot_1D(self, **kwargs)

        elif self._squeeze_ndim == 2:

            ax = plot_2D(self, **kwargs)

        elif self._squeeze_ndim == 3:

            ax = plot_3D(self, **kwargs)

        else:
            error_('Cannot guess an adequate plotter, nothing done!')
            return False

        return ax
Example #13
0
def _cantera_is_not_available():
    if ct is None:
        error_(
            "Missing optional dependency 'cantera'.  Use conda or pip to install cantera."
        )
    return ct is None
Example #14
0
def download_iris():
    """
    Upload the classical `IRIS` dataset.

    The `IRIS` dataset is a classical example for machine learning.It is downloaded from
    the [UCI distant repository](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data)

    Returns
    -------
    dataset
        The `IRIS` dataset.

    See Also
    --------
    read : Read data from experimental data.
    """
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

    try:
        connection = True
        response = requests.get(url, stream=True, timeout=10)
    except OSError:
        error_(
            "OSError: Cannot connect to the UCI repository. Try Scikit-Learn")
        connection = False

    if connection:  # Download data
        txtdata = ""
        for rd in response.iter_content():
            txtdata += rd.decode("utf8")

        fil = StringIO(txtdata)
        try:
            data = np.loadtxt(fil, delimiter=",", usecols=range(4))
            fil.seek(0)
            labels = np.loadtxt(fil, delimiter=",", usecols=(4, ), dtype="|S")
            labels = list((lab.decode("utf8") for lab in labels))
        except Exception:
            raise OSError("can't read JCAMP file")

        coordx = Coord(
            labels=[
                "sepal_length", "sepal width", "petal_length", "petal_width"
            ],
            title="features",
        )
        coordy = Coord(labels=labels, title="samples")

        new = NDDataset(
            data,
            coordset=[coordy, coordx],
            title="size",
            name="`IRIS` Dataset",
            units="cm",
        )

        new.history = "Loaded from UC Irvine machine learning repository"

        return new

    else:
        # Cannot download - use the scikit-learn dataset (if scikit-learn is installed)

        sklearn = import_optional_dependency("sklearn", errors="ignore")
        if sklearn is None:
            raise OSError("Failed in uploading the `IRIS` dataset!")
        else:
            from sklearn import datasets

        data = datasets.load_iris()

        coordx = Coord(
            labels=[
                "sepal_length", "sepal width", "petal_length", "petal_width"
            ],
            title="features",
        )
        labels = [data.target_names[i] for i in data.target]
        coordy = Coord(labels=labels, title="samples")

        new = NDDataset(
            data.data,
            coordset=[coordy, coordx],
            title="size",
            name="`IRIS` Dataset",
            units="cm",
        )

        new.history = "Loaded from scikit-learn datasets"

        return new
Example #15
0
    def makestyle(self, stylename="mydefault", to_mpl=False):
        """
        Create Matplotlib Style files.

        Parameters
        ----------
        stylename :
        to_mpl :

        Returns
        --------
        stylename
            Name of the style

        """
        if stylename.startswith("scpy"):
            error_(
                "Style name starting with `scpy` are READ-ONLY. Please use an another style name."
            )
            return

        txt = ""
        sline = ""

        for key in mpl.rcParams.keys():
            if key in [
                    "animation.avconv_args",
                    "animation.avconv_path",
                    "animation.html_args",
                    "keymap.all_axes",
                    "mathtext.fallback_to_cm",
                    "validate_bool_maybe_none",
                    "savefig.jpeg_quality",
                    "text.latex.preview",
                    "backend",
                    "backend_fallback",
                    "date.epoch",
                    "docstring.hardcopy",
                    "figure.max_open_warning",
                    "figure.raise_window",
                    "interactive",
                    "savefig.directory",
                    "timezone",
                    "tk.window_focus",
                    "toolbar",
                    "webagg.address",
                    "webagg.open_in_browser",
                    "webagg.port",
                    "webagg.port_retries",
            ]:
                continue

            val = str(mpl.rcParams[key])
            if val.startswith("CapStyle") or val.startswith("JoinStyle"):
                val = val.split(".")[-1]

            sav = ""
            while val != sav:
                sav = val
                val = val.replace("  ", " ")
            line = f"{key:40s} : {val}\n"
            if line[0] != sline:
                txt += "\n"
                sline = line[0]
            if key not in ["axes.prop_cycle"]:
                line = (line.replace("[", "").replace("]", "").replace(
                    "'", "").replace('"', ""))
            if key == "savefig.bbox":
                line = f"{key:40s} : standard\n"
            txt += line.replace("#", "")

        # Non matplotlib parameters,
        # some parameters are not saved in matplotlib style sheets so we willa dd them here
        nonmplpars = [
            "method_1D",
            "method_2D",
            "method_3D",
            "colorbar",
            "show_projections",
            "show_projection_x",
            "show_projection_y",
            "colormap",
            "max_lines_in_stack",
            "simplify",
            "number_of_x_labels",
            "number_of_y_labels",
            "number_of_z_labels",
            "number_of_contours",
            "contour_alpha",
            "contour_start",
            "antialiased",
            "rcount",
            "ccount",
        ]
        txt += "\n\n##\n## ADDITIONAL PARAMETERS FOR SPECTROCHEMPY\n##\n"
        for par in nonmplpars:
            txt += f"##@{par:37s} : {getattr(self, par)}\n"

        stylesheet = (pathclean(self.stylesheets) /
                      stylename).with_suffix(".mplstyle")
        stylesheet.write_text(txt)

        if to_mpl:
            # make it also accessible to pyplot
            stylelib = (pathclean(mpl.get_configdir()) / "stylelib" /
                        stylename).with_suffix(".mplstyle")
            stylelib.write_text()

        return stylename
Example #16
0
    def plot(self, method=None, **kwargs):
        """
        Generic plot function.

        This apply to a |NDDataset| but actually delegate the work to a plotter defined by the keyword parameter
        ``method``.

        Parameters
        ----------
        method : str, optional, default: "generic"
            Specify with plot method to use.
        **kwargs
            Any optional parameters to pass to the plot method.
            See plot_1D, plot_2D and plot_3D for a  liste of possible arguments.

        Returns
        -------
        axe
            The axe instance on which the plot has bee performed.

        See Also
        --------
        plot_1D
        plot_pen
        plot_bar
        plot_scatter_pen
        plot_multiple
        plot_2D
        plot_stack
        plot_map
        plot_image
        plot_1D
        plot_surface
        plot_waterfall
        multiplot

        Examples
        --------

        For 1D data, the default plot is done with method scatter

        >>> nd = scp.NDDataset([1, 2, 3])
        >>> _ = nd.plot()  # default to method="scatter"

        or
        >>> _ = nd.plot(method="scatter")

        Equivalently, one can also specify the method to use as follow:
        >>> _ = nd.plot_scatter()
        >>> _ = nd.plot_1D()

        For

        """

        # --------------------------------------------------------------------
        # select plotter depending on the dimension of the data
        # --------------------------------------------------------------------
        if method:
            _plotter = getattr(self, f"plot_{method.replace('+', '_')}", None)
            if _plotter is None:
                # no plotter found
                error_("The specified plotter for method "
                       "`{}` was not found!".format(method))
                raise IOError
        else:
            _plotter = self._plot_generic

        # Execute the plotter
        # --------------------

        return _plotter(**kwargs)
Example #17
0
def fft(dataset, size=None, sizeff=None, inv=False, ppm=True, **kwargs):
    """
    Apply a complex fast fourier transform.

    For multidimensional NDDataset,
    the apodization is by default performed on the last dimension.

    The data in the last dimension MUST be in time-domain (or without dimension)
    or an error is raised.

    To make reverse Fourier transform, i.e., from frequency to time domain, use the `ifft` transform
    (or equivalently, the `inv=True` parameters.

    Parameters
    ----------
    dataset : |NDDataset|
        The dataset on which to apply the fft transformation.
    size : int, optional
        Size of the transformed dataset dimension - a shorter parameter is `si`. by default, the size is the closest
        power of two greater than the data size.
    sizeff : int, optional
        The number of effective data point to take into account for the transformation. By default it is equal to the
        data size, but may be smaller.
    inv : bool, optional, default=False
        If True, an inverse Fourier transform is performed - size parameter is not taken into account.
    ppm : bool, optional, default=True
        If True, and data are from NMR, then a ppm scale is calculated instead of frequency.
    **kwargs
        Optional keyword parameters (see Other Parameters).

    Returns
    -------
    out
        Transformed |NDDataset|.

    Other Parameters
    ----------------
    dim : str or int, optional, default='x'.
        Specify on which dimension to apply this method. If `dim` is specified as an integer it is equivalent
        to the usual `axis` numpy parameter.
    inplace : bool, optional, default=False.
        True if we make the transform inplace.  If False, the function return a new object
    tdeff : int, optional
        Alias of sizeff (specific to NMR). If both sizeff and tdeff are passed, sizeff has the priority.

    See Also
    --------
    ifft : Inverse Fourier transform.
    """
    # datatype
    is_nmr = dataset.origin.lower() in [
        "topspin",
    ]
    is_ir = dataset.meta.interferogram

    # On which axis do we want to apply transform (get axis from arguments)
    dim = kwargs.pop("dim", kwargs.pop("axis", -1))
    axis, dim = dataset.get_axis(dim, negative_axis=True)

    # output dataset inplace or not
    inplace = kwargs.pop("inplace", False)
    if not inplace:  # default
        new = dataset.copy()  # copy to be sure not to modify this dataset
    else:
        new = dataset

    # The last dimension is always the dimension on which we apply the fourier transform.
    # If needed, we swap the dimensions to be sure to be in this situation
    swapped = False
    if axis != -1:
        new.swapdims(axis, -1, inplace=True)  # must be done in  place
        swapped = True

    # Select the last coordinates
    x = new.coordset[dim]

    # Performs some dimentionality checking
    error = False
    if (not inv and not x.unitless and not x.dimensionless
            and x.units.dimensionality != "[time]"):
        error_(
            "fft apply only to dimensions with [time] dimensionality or dimensionless coords\n"
            "fft processing was thus cancelled")
        error = True

    elif (inv and not x.unitless and x.units.dimensionality != "1/[time]"
          and not x.dimensionless):
        error_(
            "ifft apply only to dimensions with [frequency] dimensionality or with ppm units "
            "or dimensionless coords.\n ifft processing was thus cancelled")
        error = True

    # Should not be masked
    elif new.is_masked:
        error_(
            "current fft or ifft processing does not support masked data as input.\n processing was thus cancelled"
        )
        error = True

    # Coordinates should be uniformly spaced (linear coordinate)
    if not x.linear:
        # try to linearize it
        x.linear = True
        if not x.linear:
            # linearization failed
            error = True

    if hasattr(x, "_use_time_axis"):
        x._use_time_axis = True  # we need to havze dimentionless or time units

    if not error:
        # OK we can proceed

        # time domain size
        td = None
        if not inv:
            td = x.size

        # if no size (or si) parameter then use the size of the data (size not used for inverse transform
        if size is None or inv:
            size = kwargs.get("si", x.size)

        # we default to the closest power of two larger of the data size
        if is_nmr:
            size = largest_power_of_2(size)

        # do we have an effective td to apply
        tdeff = sizeff
        if tdeff is None:
            tdeff = kwargs.get("tdeff", td)

        if tdeff is None or tdeff < 5 or tdeff > size:
            tdeff = size

        # Eventually apply the effective size
        new[..., tdeff:] = 0.0

        # Should we work on complex or hypercomplex data
        # interleaved is in case of >2D data  ( # TODO: >D not yet implemented in ndcomplex.py
        iscomplex = False
        if axis == -1:
            iscomplex = new.is_complex
        if new.is_quaternion or new.is_interleaved:
            iscomplex = True

        # If we are in NMR we have an additional complication due to the mode
        # of acquisition (sequential mode when ['QSEQ','TPPI','STATES-TPPI'])
        encoding = "undefined"
        if not inv and "encoding" in new.meta:
            encoding = new.meta.encoding[-1]

        qsim = encoding in ["QSIM", "DQD"]
        qseq = "QSEQ" in encoding
        states = "STATES" in encoding
        echoanti = "ECHO-ANTIECHO" in encoding
        tppi = "TPPI" in encoding
        qf = "QF" in encoding

        zf_size(new, size=size, inplace=True)

        # Perform the fft
        if qsim:  # F2 fourier transform
            data = _fft(new.data)

        elif qseq:
            raise NotImplementedError("QSEQ not yet implemented")

        elif states:
            data = _states_fft(new.data, tppi)

        elif tppi:
            data = _tppi_fft(new.data)

        elif echoanti:
            data = _echoanti_fft(new.data)

        elif qf:
            # we must perform a real fourier transform of a time domain dataset
            data = _qf_fft(new.data)

        elif iscomplex and inv:
            # We assume no special encoding for inverse complex fft transform
            data = _ifft(new.data)

        elif not iscomplex and not inv and is_ir:
            # transform interferogram
            data = _interferogram_fft(new.data)

        elif not iscomplex and inv:
            raise NotImplementedError("Inverse FFT for real dimension")

        else:
            raise NotImplementedError(
                f"{encoding} not yet implemented. We recommend you to put an issue on "
                f"Github, so we will not forget to work on this!.")

        # We need here to create a new dataset with new shape and axis
        new._data = data
        new.mask = False

        # create new coordinates for the transformed data

        if is_nmr:
            sfo1 = new.meta.sfo1[-1]
            bf1 = new.meta.bf1[-1]
            sf = new.meta.sf[-1]
            sw = new.meta.sw_h[-1]
            if new.meta.nuc1 is not None:
                nuc1 = new.meta.nuc1[-1]
                regex = r"([^a-zA-Z]+)([a-zA-Z]+)"
                m = re.match(regex, nuc1)
                if m is not None:
                    mass = m[1]
                    name = m[2]
                    nucleus = "^{" + mass + "}" + name
                else:
                    nucleus = ""
            else:
                nucleus = ""
        else:
            sfo1 = 0 * ur.Hz
            bf1 = sfo1
            dw = x.spacing
            if isinstance(dw, list):
                print()
            sw = 1 / 2 / dw
            sf = -sw / 2
            size = size // 2

        if not inv:
            # time to frequency
            sizem = max(size - 1, 1)
            deltaf = -sw / sizem
            first = sfo1 - sf - deltaf * sizem / 2.0

            # newcoord = type(x)(np.arange(size) * deltaf + first)
            newcoord = LinearCoord.arange(size) * deltaf + first
            newcoord.show_datapoints = False
            newcoord.name = x.name
            new.title = "intensity"
            if is_nmr:
                newcoord.title = f"${nucleus}$ frequency"
                newcoord.ito("Hz")
            elif is_ir:
                new._units = None
                newcoord.title = "wavenumbers"
                newcoord.ito("cm^-1")
            else:
                newcoord.title = "frequency"
                newcoord.ito("Hz")

        else:
            # frequency or ppm to time
            sw = abs(x.data[-1] - x.data[0])
            if x.units == "ppm":
                sw = bf1.to("Hz") * sw / 1.0e6
            deltat = (1.0 / sw).to("us")

            newcoord = LinearCoord.arange(size) * deltat
            newcoord.name = x.name
            newcoord.title = "time"
            newcoord.ito("us")

        if is_nmr and not inv:
            newcoord.meta.larmor = bf1  # needed for ppm transformation
            ppm = kwargs.get("ppm", True)
            if ppm:
                newcoord.ito("ppm")
                newcoord.title = rf"$\delta\ {nucleus}$"

        new.coordset[dim] = newcoord

        # update history
        s = "ifft" if inv else "fft"
        new.history = f"{s} applied on dimension {dim}"

        # PHASE ?
        iscomplex = new.is_complex or new.is_quaternion
        if iscomplex and not inv:
            # phase frequency domain

            # if some phase related metadata do not exist yet, initialize them
            new.meta.readonly = False

            if not new.meta.phased:
                new.meta.phased = [False] * new.ndim

            if not new.meta.phc0:
                new.meta.phc0 = [0] * new.ndim

            if not new.meta.phc1:
                new.meta.phc1 = [0] * new.ndim

            if not new.meta.exptc:
                new.meta.exptc = [0] * new.ndim

            if not new.meta.pivot:
                new.meta.pivot = [0] * new.ndim

            # applied the stored phases
            new.pk(inplace=True)

            new.meta.pivot[-1] = abs(new).coordmax(dim=dim)
            new.meta.readonly = True

    # restore original data order if it was swapped
    if swapped:
        new.swapdims(axis, -1, inplace=True)  # must be done inplace

    return new
Example #18
0
def download_nist_ir(CAS, index="all"):
    """
    Upload IR spectra from NIST webbook

    Parameters
    ----------
    CAS : int or str
        the CAS number, can be given as "XXXX-XX-X" (str), "XXXXXXX" (str), XXXXXXX (int)

    index : str or int or tuple of ints
        If set to 'all' (default, import all available spectra for the compound corresponding to the index, or a single spectrum,
        or selected spectra.

    Returns
    -------
    list of NDDataset or NDDataset
        The dataset(s).

    See Also
    --------
    read : Read data from experimental data.
    """

    if isinstance(CAS, str) and "-" in CAS:
        CAS = CAS.replace("-", "")

    if index == "all":
        # test urls and return list if any...
        index = []
        i = 0
        while "continue":
            url = (
                f"https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C{CAS}&Index={i}&Type=IR"
            )
            try:
                response = requests.get(url, timeout=10)
                if b"Spectrum not found" in response.content[:30]:
                    break
                else:
                    index.append(i)
                    i += 1
            except OSError:
                error_("OSError: could not connect to NIST")
                return None

        if len(index) == 0:
            error_("NIST IR: no spectrum found")
            return
        elif len(index) == 1:
            info_("NIST IR: 1 spectrum found")
        else:
            info_("NISTR IR: {len(index)} spectra found")

    elif isinstance(index, int):
        index = [index]
    elif not is_iterable(index):
        raise ValueError("index must be 'all', int or iterable of int")

    out = []
    for i in index:
        # sample adress (water, spectrum 1)
        # https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C7732185&Index=1&Type=IR
        url = f"https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C{CAS}&Index={i}&Type=IR"
        try:
            response = requests.get(url, stream=True, timeout=10)
            if b"Spectrum not found" in response.content[:30]:
                error_(
                    f"NIST IR: Spectrum {i} does not exist... please check !")
                if i == index[-1] and out == []:
                    return None
                else:
                    break

        except OSError:
            error_("OSError: Cannot connect... ")
            return None

        # Load data
        txtdata = ""
        for rd in response.iter_content():
            txtdata += rd.decode("utf8")

        with open("temp.jdx", "w") as f:
            f.write(txtdata)
        try:
            ds = read_jcamp("temp.jdx")

            # replace the default entry ":imported from jdx file":
            ds.history[0] = ds.history[0][:len(str(datetime.now(
                timezone.utc)))] + (f" : downloaded from NIST: {url}\n")
            out.append(ds)
            (Path(".") / "temp.jdx").unlink()

        except Exception:
            raise OSError(
                "Can't read this JCAMP file: please report the issue to Spectrochempy developpers"
            )

    if len(out) == 1:
        return out[0]
    else:
        return out
Example #19
0
def smooth(dataset, window_length=5, window='flat', **kwargs):
    """
    Smooth the data using a window with requested size.

    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal (with the window size) in both ends so that
    transient parts are minimized in the begining and end part of the output data.

    Parameters
    ----------
    dataset :  |NDDataset| or a ndarray-like object
        Input object.
    window_length :  int, optional, default=5
        The dimension of the smoothing window; must be an odd integer.
    window : str, optional, default='flat'
        The type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'.
        flat window will produce a moving average smoothing.
    **kwargs : dict
        See other parameters.

    Returns
    -------
    smoothed
        Same type as input dataset.

    Other Parameters
    ----------------
    dim : str or int, optional, default='x'.
        Specify on which dimension to apply this method. If `dim` is specified as an integer it is equivalent
        to the usual `axis` numpy parameter.
    inplace : bool, optional, default=False.
        True if we make the transform inplace.  If False, the function return a new object

    See Also
    --------
    savgol_filter : Apply a Savitzky-Golay filter.

    Examples
    --------
    >>> import spectrochempy as scp
    >>> ds = scp.read("irdata/nh4y-activation.spg")
    >>> ds.smooth(window_length=11)
    NDDataset: [float64] a.u. (shape: (y:55, x:5549))
    """

    if not kwargs.pop('inplace', False):
        # default
        new = dataset.copy()
    else:
        new = dataset

    is_ndarray = False
    axis = kwargs.pop('dim', kwargs.pop('axis', -1))
    if hasattr(new, 'get_axis'):
        axis, dim = new.get_axis(axis, negative_axis=True)
    else:
        is_ndarray = True

    swaped = False
    if axis != -1:
        new.swapdims(axis, -1, inplace=True)  # must be done in  place
        swaped = True

    if (window_length % 2) != 1:
        error_("Window length must be an odd integer.")

    if new.shape[-1] < window_length:
        error_("Input vector needs to be bigger than window size.")
        return new

    if window_length < 3:
        return new

    wind = {
        'flat': np.ones,
        'hanning': np.hanning,
        'hamming': np.hamming,
        'bartlett': np.bartlett,
        'blackman': np.blackman,
    }

    if not callable(window):
        if window not in wind.keys():
            error_(
                "Window must be a callable or a string among 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
            )
            return new
        window = wind[window]

    # extend on both side to limit side effects
    dat = np.r_['-1', new.data[..., window_length - 1:0:-1], new.data,
                new.data[..., -1:-window_length:-1]]

    w = window(window_length)
    data = np.apply_along_axis(np.convolve, -1, dat, w / w.sum(), mode='valid')
    data = data[..., int(window_length / 2):-int(window_length / 2)]

    if not is_ndarray:
        new.data = data
        new.history = f'smoothing with a window:{window.__name__} of length {window_length}'

        # restore original data order if it was swaped
        if swaped:
            new.swapdims(axis, -1, inplace=True)  # must be done inplace
    else:
        new = data

    return new
Example #20
0
    def wrapper(dataset, **kwargs):

        # On which axis do we want to phase (get axis from arguments)
        axis, dim = dataset.get_axis(**kwargs, negative_axis=True)

        # output dataset inplace (by default) or not
        if not kwargs.pop('inplace', False):
            new = dataset.copy()  # copy to be sure not to modify this dataset
        else:
            new = dataset

        swaped = False
        if axis != -1:
            new.swapdims(axis, -1, inplace=True)  # must be done in  place
            swaped = True

        # Get the coordinates for the last dimension
        x = new.coordset[dim]

        # check if the dimensionality is compatible with this kind of functions
        if x.unitless or x.dimensionless or x.units.dimensionality != '[time]':

            # extract inital phase from metadata
            def _check_units(par, default_units, inv=False):
                if not isinstance(par, Quantity):
                    par *= Quantity(1., default_units)
                elif inv:
                    if par == 0:
                        return par
                    par = 1. / (1. / par).to(default_units)
                else:
                    par = par.to(default_units)
                return par

            # Set correct units for the parameters
            dunits = dataset.coordset[dim].units

            current = [new.meta.phc0[-1], new.meta.phc1[-1]]
            rel = kwargs.pop('rel', False)
            if rel:  # relative phase
                current = [0, 0]
            kwargs['phc0'] = (_check_units(kwargs.get('phc0', 0), 'degree') - current[0]).magnitude
            kwargs['phc1'] = (_check_units(kwargs.get('phc1', 0), 'degree') - current[1]).magnitude
            kwargs['pivot'] = _check_units(kwargs.get('pivot', new.meta.pivot[-1]), dunits).magnitude
            kwargs['exptc'] = _check_units(kwargs.get('exptc', new.meta.get('exptc', [0] * new.ndim)[-1]), dunits,
                                           inv=True).magnitude

            if not new.meta.phased[-1]:
                # initial phase from topspin have not yet been used
                kwargs['phc0'] = -kwargs['phc0']
                kwargs['phc1'] = -kwargs['phc1']

            apod = method(new.data, **kwargs)
            new *= apod

            new.history = f'`{method.__name__}` applied to dimension `{dim}` with parameters: {kwargs}'

            if not new.meta.phased[-1]:
                new.meta.phased[-1] = True
                new.meta.phc0[-1] = 0 * ur.degree
                new.meta.phc1[-1] = 0 * ur.degree
                new.meta.exptc[-1] = 0 * (1 / dunits)
            else:
                if rel:
                    new.meta.phc0[-1] += kwargs['phc0'] * ur.degree
                    new.meta.phc1[-1] += kwargs['phc1'] * ur.degree
                else:
                    new.meta.phc0[-1] = kwargs['phc0'] * ur.degree
                    new.meta.phc1[-1] = kwargs['phc1'] * ur.degree

                    # TODO: to do for exptc too!
                new.meta.exptc[-1] = kwargs['exptc'] * (1 / dunits)

            new.meta.pivot[-1] = kwargs['pivot'] * dunits

        else:  # not (x.unitless or x.dimensionless or x.units.dimensionality != '[time]')
            error_('This method apply only to dimensions with [frequency] or [dimensionless] dimensionality.\n'
                   'Phase processing was thus cancelled')

        # restore original data order if it was swaped
        if swaped:
            new.swapdims(axis, -1, inplace=True)  # must be done inplace

        return new
Example #21
0
        def wrapper(dataset, **kwargs):

            # what to return
            retapod = kwargs.pop('retapod', False)
            dryrun = kwargs.pop('dryrun', False)
            # is_nmr = dataset.origin.lower() in ["topspin", ]
            is_ir = dataset.origin.lower() in ["omnic", "opus"]

            # On which axis do we want to apodize? (get axis from arguments)
            axis, dim = dataset.get_axis(**kwargs, negative_axis=True)

            # output dataset inplace (by default) or not
            if not kwargs.pop('inplace', False) and not dryrun:
                new = dataset.copy(
                )  # copy to be sure not to modify this dataset
            else:
                new = dataset

            # The last dimension is always the dimension on which we apply the apodization window.
            # If needed, we swap the dimensions to be sure to be in this situation
            swaped = False
            if axis != -1:
                new.swapdims(axis, -1, inplace=True)  # must be done in  place
                swaped = True

            # Get the coordinates for the last dimension
            x = new.coordset[dim]
            if hasattr(x, '_use_time_axis'):
                store = x._use_time_axis
                x._use_time_axis = True  # we need to have dimentionless or time units

            # check if the dimensionality is compatible with this kind of functions
            if x.unitless or x.dimensionless or x.units.dimensionality == '[time]':

                # Set correct units for parameters
                dunits = dataset.coordset[dim].units
                apod = {}
                for key, default_units in units.items():

                    if key not in kwargs or default_units is None:
                        continue

                    par = kwargs[key]

                    if isinstance(par, str):
                        par = Quantity(par)

                    if not isinstance(par, Quantity):
                        # set to default units
                        par *= Quantity(1., default_units)

                    apod[key] = par
                    if par.dimensionality == 1 / dunits.dimensionality:
                        kwargs[key] = 1. / (1. / par).to(dunits)
                    else:
                        kwargs[key] = par.to(dunits)

                    kwargs[key] = kwargs[key].magnitude

                # Call to the apodize function
                # ----------------------------

                # now call the method with unitless parameters
                if is_ir:
                    # we must apodize at the top of the interferogram.
                    zpd = int(np.argmax(new.data, -1))
                    dist2end = x.size - zpd
                    apod_arr = method(np.empty(2 * dist2end), **kwargs)
                    apod_arr = apod_arr[-x.size:]
                else:
                    apod_arr = method(x.data, **kwargs)

                if kwargs.pop('rev', False):
                    apod_arr = apod_arr[::-1]  # reverse apodization

                if kwargs.pop('inv', False):
                    apod_arr = 1. / apod_arr  # invert apodization

                if not dryrun:
                    new.history = f'`{method.__name__}` apodization performed on dimension `{dim}` ' \
                                  f'with parameters: {apod}'

                # Apply?
                if not dryrun:
                    new._data *= apod_arr

            else:  # not (x.unitless or x.dimensionless or x.units.dimensionality != '[time]')
                error_(
                    'This method apply only to dimensions with [time] or [dimensionless] dimensionality.\n'
                    'Apodization processing was thus cancelled')
                apod_arr = 1.

            # restore original data order if it was swaped
            if swaped:
                new.swapdims(axis, -1, inplace=True)  # must be done inplace

            if hasattr(x, '_use_time_axis'):
                new.x._use_time_axis = store

            if retapod:
                apodcurve = type(new)(apod_arr, coordset=[x])
                return new, apodcurve
            else:
                return new
Example #22
0
    def __getitem__(self, items, **kwargs):

        if isinstance(items, list):
            # Special case of fancy indexing
            items = (items,)

        # choose, if we keep the same or create new object
        inplace = False
        if isinstance(items, tuple) and items[-1] == INPLACE:
            items = items[:-1]
            inplace = True

        # Eventually get a better representation of the indexes
        keys = self._make_index(items)

        # init returned object
        if inplace:
            new = self
        else:
            new = self.copy()

        # slicing by index of all internal array
        if new.data is not None:
            udata = new.data[keys]

            if new.linear:
                # if self.increment > 0:
                #     new._offset = udata.min()
                # else:
                #     new._offset = udata.max()
                new._size = udata.size
                if new._size > 1:
                    inc = np.diff(udata)
                    variation = (inc.max() - inc.min()) / udata.ptp()
                    if variation < 1.0e-5:
                        new._increment = np.mean(inc)  # np.round(np.mean(
                        # inc), 5)
                        new._offset = udata[0]
                        new._data = None
                        new._linear = True
                    else:
                        new._linear = False
                else:
                    new._linear = False

            if not new.linear:
                new._data = np.asarray(udata)

        if self.is_labeled:
            # case only of 1D dataset such as Coord
            new._labels = np.array(self._labels[keys])

        if new.is_empty:
            error_(
                f"Empty array of shape {new._data.shape} resulted from slicing.\n"
                f"Check the indexes and make sure to use floats for location slicing"
            )
            new = None

        new._mask = NOMASK

        # we need to keep the names when copying coordinates to avoid later
        # problems
        new.name = self.name
        return new
Example #23
0
def align(dataset, *others, **kwargs):
    """
    Align individual |NDDataset| along given dimensions using various methods.

    Parameters
    -----------
    dataset : |NDDataset|
        Dataset on which we want to salign other objects.
    *others : |NDDataset|
        Objects to align.
    dim : str. Optional, default='x'
        Along which axis to perform the alignment.
    dims : list of str, optional, default=None
        Align along all dims defined in dims (if dim is also
        defined, then dims have higher priority).
    method : enum ['outer', 'inner', 'first', 'last', 'interpolate'], optional, default='outer'
        Which method to use for the alignment.

        If align is defined :

        * 'outer' means that a union of the different coordinates is
        achieved (missing values are masked)
        * 'inner' means that the intersection of the coordinates is used
        * 'first' means that the first dataset is used as reference
        * 'last' means that the last dataset is used as reference
        * 'interpolate' means that interpolation is performed relative to
        the first dataset.
    interpolate_method : enum ['linear','pchip']. Optional, default='linear'
        Method of interpolation to performs for the alignment.
    interpolate_sampling : 'auto', int or float. Optional, default='auto'

        * 'auto' : sampling is determined automatically from the existing data.
        * int :  if an integer values is specified, then the
          sampling interval for the interpolated data will be splitted in
          this number of points.
        * float : If a float value is provided, it determines the interval
        between the interpolated data.
    coord : |Coord|, optional, default=None
        coordinates to use for alignment. Ignore those corresponding to the
        dimensions to align.
    copy : bool, optional, default=True
        If False then the returned objects will share memory with the
        original objects, whenever it is possible :
        in principle only if reindexing is not necessary.

    Returns
    --------
    aligned_datasets : tuple of |NDDataset|
        Same objects as datasets with dimensions aligned.

    Raises
    ------
    ValueError
        issued when the dimensions given in `dim` or `dims` argument are not
        compatibles (units, titles, etc...).
    """
    # DEVELOPPER NOTE
    # There is probably better methods, but to simplify dealing with
    # LinearCoord, we transform them in Coord before treatment (going back
    # to linear if possible at the end of the process)

    # TODO: Perform an alignment along numeric labels
    # TODO: add example in docs

    # copy objects?
    copy = kwargs.pop('copy', True)

    # make a single list with dataset and the remaining object
    objects = [dataset] + list(others)

    # should we align on given external coordinates
    extern_coord = kwargs.pop('coord', None)
    if extern_coord and extern_coord.implements('LinearCoord'):
        extern_coord = Coord(extern_coord, linear=False, copy=True)

    # what's the method to use (by default='outer')
    method = kwargs.pop('method', 'outer')

    # trivial cases where alignment is not possible or unecessary
    if not objects:
        warning_('No object provided for alignment!')
        return None

    if len(objects) == 1 and objects[0].implements(
            'NDDataset') and extern_coord is None:
        # no necessary alignment
        return objects

    # evaluate on which axis we align
    axis, dims = dataset.get_axis(only_first=False, **kwargs)

    # check compatibility of the dims and prepare the dimension for alignment
    for axis, dim in zip(axis, dims):

        # get all objets to align
        _objects = {}
        _nobj = 0

        for idx, object in enumerate(objects):

            if not object.implements('NDDataset'):
                error_(
                    f'Bad object(s) found: {object}. Note that only NDDataset '
                    f'objects are accepted '
                    f'for alignment')
                return None

            _objects[_nobj] = {
                'obj': object.copy(),
                'idx': idx,
            }
            _nobj += 1

        _last = _nobj - 1

        # get the reference object (by default the first, except if method if
        # set to 'last'
        ref_obj_index = 0
        if method == 'last':
            ref_obj_index = _last

        ref_obj = _objects[ref_obj_index]['obj']

        # as we will sort their coordinates at some point, we need to know
        # if the coordinates need to be reversed at
        # the end of the alignment process
        reversed = ref_obj.coordset[dim].reversed
        if reversed:
            ref_obj.sort(descend=False, dim=dim, inplace=True)

        # get the coordset corresponding to the reference object
        ref_obj_coordset = ref_obj.coordset

        # get the coordinate for the reference dimension
        ref_coord = ref_obj_coordset[dim]

        # as we will sort their coordinates at some point, we need to know
        # if the coordinates need to be reversed at
        # the end of the alignment process
        reversed = ref_coord.reversed

        # prepare a new Coord object to store the final new dimension
        new_coord = ref_coord.copy()

        ndec = get_n_decimals(new_coord.data.max(), 1.e-5)

        if new_coord.implements('LinearCoord'):
            new_coord = Coord(new_coord, linear=False, copy=True)

        # loop on all object
        for index, object in _objects.items():

            obj = object['obj']

            if obj is ref_obj:
                # not necessary to compare with itself!
                continue

            if reversed:
                obj.sort(descend=False, dim=dim, inplace=True)

            # get the current objet coordinates and check compatibility
            coord = obj.coordset[dim]
            if coord.implements('LinearCoord') or coord.linear:
                coord = Coord(coord, linear=False, copy=True)

            if not coord.is_units_compatible(ref_coord):
                # not compatible, stop everything
                raise UnitsCompatibilityError(
                    'NDataset to align must have compatible units!')

            # do units transform if necesssary so coords can be compared
            if coord.units != ref_coord.units:
                coord.ito(ref_coord)

            # adjust the new_cord depending on the method of alignement

            new_coord_data = set(np.around(new_coord.data, ndec))
            coord_data = set(np.around(coord.data, ndec))

            if method in ['outer', 'interpolate']:
                # in this case we do a union of the coords (masking the
                # missing values)
                # For method=`interpolate`, the interpolation will be
                # performed in a second step
                new_coord._data = sorted(coord_data | new_coord_data)

            elif method == 'inner':
                # take only intersection of the coordinates
                # and generate a warning if it result something null or
                new_coord._data = sorted(coord_data & new_coord_data)

            elif method in ['first', 'last']:
                # we take the reference coordinates already determined as
                # basis (masking the missing values)
                continue

            else:
                raise NotImplementedError(f'The method {method} is unknown!')

        # Now perform alignment of all objects on the new coordinates
        for index, object in _objects.items():

            obj = object['obj']

            # get the dim index for the given object
            dim_index = obj.dims.index(dim)

            # prepare slicing keys ; set slice(None) for the untouched
            # dimensions preceeding the dimension of interest
            prepend_keys = [slice(None)] * dim_index

            # New objects for obj must be created with the new coordinates

            # change the data shape
            new_obj_shape = list(obj.shape)
            new_obj_shape[dim_index] = len(new_coord)
            new_obj_data = np.full(new_obj_shape, np.NaN)

            # create new dataset for obj and ref_objects
            if copy:
                new_obj = obj.copy()
            else:
                new_obj = obj

            # update the data and mask
            coord = obj.coordset[dim]
            coord_data = set(np.around(coord.data, ndec))

            dim_loc = new_coord._loc2index(sorted(coord_data))
            loc = tuple(prepend_keys + [dim_loc])

            new_obj._data = new_obj_data

            # mask all the data then unmask later the relevant data in
            # the next step

            if not new_obj.is_masked:
                new_obj.mask = MASKED
                new_obj.mask[loc] = False
            else:
                mask = new_obj.mask.copy()
                new_obj.mask = MASKED
                new_obj.mask[loc] = mask

            # set the data for the loc
            new_obj._data[loc] = obj.data

            # update the coordinates
            new_coordset = obj.coordset.copy()
            if coord.is_labeled:
                label_shape = list(coord.labels.shape)
                label_shape[0] = new_coord.size
                new_coord._labels = np.zeros(tuple(label_shape)).astype(
                    coord.labels.dtype)
                new_coord._labels[:] = '--'
                new_coord._labels[dim_loc] = coord.labels
            setattr(new_coordset, dim, new_coord)
            new_obj._coordset = new_coordset

            # reversed?
            if reversed:
                # we must reverse the given coordinates
                new_obj.sort(descend=reversed, dim=dim, inplace=True)

            # update the _objects
            _objects[index]['obj'] = new_obj

            if method == 'interpolate':
                warning_(
                    'Interpolation not yet implemented - for now equivalent '
                    'to `outer`')

        # the new transformed object must be in the same order as the passed
        # objects
        # and the missing values must be masked (for the moment they are defined to NaN

        for index, object in _objects.items():
            obj = object['obj']
            # obj[np.where(np.isnan(obj))] = MASKED  # mask NaN values
            obj[np.where(np.isnan(
                obj))] = 99999999999999.  # replace NaN values (to simplify
            # comparisons)
            idx = int(object['idx'])
            objects[idx] = obj

            # we also transform into linear coord if possible ?
            pass  # TODO:

    # Now return

    return tuple(objects)
Example #24
0
def download_IRIS():
    """
    Upload the classical IRIS dataset.

    The IRIS dataset is a classical example for machine learning.It is downloaded from
    the [UCI distant repository](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data)

    Returns
    -------
    downloaded
        The IRIS dataset.

    See Also
    --------
    read : Ro read data from experimental data.

    Examples
    --------
    Upload a dataset form a distant server

    >>> import spectrochempy as scp
    >>> dataset = scp.download_IRIS()
    """
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

    try:
        connection = True
        response = requests.get(url, stream=True, timeout=10)
    except Exception as e:
        error_(e)
        connection = False

    if connection:  # Download data
        txtdata = ''
        for rd in response.iter_content():
            txtdata += rd.decode('utf8')

        fil = StringIO(txtdata)
        try:
            data = np.loadtxt(fil, delimiter=',', usecols=range(4))
            fil.seek(0)
            labels = np.loadtxt(fil, delimiter=',', usecols=(4, ), dtype='|S')
            labels = list((lab.decode("utf8") for lab in labels))
        except Exception:
            raise IOError(
                '{} is not a .csv file or its structure cannot be recognized')

        coordx = Coord(labels=[
            'sepal_length', 'sepal width', 'petal_length', 'petal_width'
        ],
                       title='features')
        coordy = Coord(labels=labels, title='samples')

        new = NDDataset(data,
                        coordset=[coordy, coordx],
                        title='size',
                        name='IRIS Dataset',
                        units='cm')

        new.history = 'Loaded from UC Irvine machine learning repository'

        return new

    else:
        # Cannot download - use the scikit-learn dataset (if scikit-learn is installed)

        try:
            from sklearn import datasets
        except ImportError:
            raise IOError('Failed in uploading the IRIS dataset!')

        # import some data to play with
        data = datasets.load_iris()

        coordx = Coord(labels=[
            'sepal_length', 'sepal width', 'petal_length', 'petal_width'
        ],
                       title='features')
        labels = [data.target_names[i] for i in data.target]
        coordy = Coord(labels=labels, title='samples')

        new = NDDataset(data.data,
                        coordset=[coordy, coordx],
                        title='size',
                        name='IRIS Dataset',
                        units='cm')

        new.history = 'Loaded from scikit-learn datasets'

        return new