Example #1
0
def test_nddataset_binary_operation_with_other_1D():
    coord1 = Coord(np.linspace(0.0, 10.0, 10))
    coord2 = Coord(np.linspace(1.0, 5.5, 5))
    d1 = NDDataset(np.random.random((10, 5)), coordset=[coord1, coord2])
    d2 = d1[0]
    # this should work independently of the value of the coordinates on dimension y
    d3 = d1 - d2
    assert_array_equal(d3.data, d1.data - d2.data)
Example #2
0
    def __init__(self,
                 data=None,
                 coordset=None,
                 coordunits=None,
                 coordtitles=None,
                 **kwargs):

        super().__init__(data, **kwargs)

        self._parent = None

        # eventually set the coordinates with optional units and title

        if isinstance(coordset, CoordSet):
            self.set_coordset(**coordset)

        else:
            if coordset is None:
                coordset = [None] * self.ndim

            if coordunits is None:
                coordunits = [None] * self.ndim

            if coordtitles is None:
                coordtitles = [None] * self.ndim

            _coordset = []
            for c, u, t in zip(coordset, coordunits, coordtitles):
                if not isinstance(c, CoordSet):
                    if isinstance(c, LinearCoord):
                        coord = LinearCoord(c)
                    else:
                        coord = Coord(c)
                    if u is not None:
                        coord.units = u
                    if t is not None:
                        coord.title = t
                else:
                    if u:  # pragma: no cover
                        warning_(
                            "units have been set for a CoordSet, but this will be ignored "
                            "(units are only defined at the coordinate level")
                    if t:  # pragma: no cover
                        warning_(
                            "title will be ignored as they are only defined at the coordinates level"
                        )
                    coord = c

                _coordset.append(coord)

            if _coordset and set(_coordset) != {
                    Coord()
            }:  # if they are no coordinates do nothing
                self.set_coordset(*_coordset)
def stack(*datasets):
    """
    Stack of |NDDataset| objects along a new dimension.

    Any number of |NDDataset| objects can be stacked. For this operation
    to be defined the following must be true :

    #. all inputs must be valid dataset objects,
    #. units of data and axis must be compatible (rescaling is applied
       automatically if necessary).

    Parameters
    ----------
    *datasets : a series of |NDDataset|
        The dataset to be stacked to the current dataset.

    Returns
    --------
    out
        A |NDDataset| created from the stack of the `datasets` datasets.

    See Also
    --------
    concatenate : Concatenate |NDDataset| objects along a given dimension.

    Examples
    --------

    >>> A = scp.read('irdata/nh4y-activation.spg', protocol='omnic')
    >>> B = scp.read('irdata/nh4y-activation.scp')
    >>> C = scp.stack(A, B)
    >>> print(C)
    NDDataset: [float64] a.u. (shape: (z:2, y:55, x:5549))
    """

    datasets = _get_copy(datasets)

    shapes = {ds.shape for ds in datasets}
    if len(shapes) != 1:
        raise DimensionsCompatibilityError(
            "all input arrays must have the same shape")

    # prepend a new dimension
    for i, dataset in enumerate(datasets):
        dataset._data = dataset.data[np.newaxis]
        dataset._mask = dataset.mask[np.newaxis]
        newcoord = Coord([i], labels=[dataset.name])
        newcoord.name = (OrderedSet(DEFAULT_DIM_NAME) - dataset._dims).pop()
        dataset.add_coordset(newcoord)
        dataset.dims = [newcoord.name] + dataset.dims

    return concatenate(*datasets, dims=0)
Example #4
0
def test_coord_unit_conversion_operators(operation, result_units):
    in_km = Coord(data=np.linspace(4000, 1000, 10),
                  units='km',
                  mask=None,
                  title='something')

    scalar = 2.

    operator_km = in_km.__getattribute__(operation)

    combined = operator_km(scalar)
    debug_(f'{operation}, {combined}')
    assert_equal_units(combined.units, result_units)
Example #5
0
    def __getitem__(self, items, **kwargs):

        saveditems = items

        # coordinate selection to test first
        if isinstance(items, str):
            try:
                return self._coordset[items]
            except Exception:
                pass

        # slicing
        new, items = super().__getitem__(items, return_index=True)

        if new is None:
            return None

        if self._coordset is not None:
            names = self._coordset.names  # all names of the current coordinates
            new_coords = [None] * len(names)
            for i, item in enumerate(items):
                # get the corresponding dimension name in the dims list
                name = self.dims[i]
                # get the corresponding index in the coordinate's names list
                idx = names.index(name)
                if self._coordset[idx].is_empty:
                    new_coords[idx] = Coord(None, name=name)
                elif isinstance(item, slice):
                    # add the slice on the corresponding coordinates on the dim to the new list of coordinates
                    if not isinstance(self._coordset[idx], CoordSet):
                        new_coords[idx] = self._coordset[idx][item]
                    else:
                        # we must slice all internal coordinates
                        newc = []
                        for c in self._coordset[idx]:
                            newc.append(c[item])
                        new_coords[idx] = CoordSet(*newc[::-1], name=name)
                        # we reverse to be sure
                        # the order will be  kept for internal coordinates
                        new_coords[idx]._default = self._coordset[
                            idx]._default  # set the same default coord
                        new_coords[idx]._is_same_dim = self._coordset[
                            idx]._is_same_dim

                elif isinstance(item, (np.ndarray, list)):
                    new_coords[idx] = self._coordset[idx][item]

            new.set_coordset(*new_coords, keepnames=True)

        new.history = f"Slice extracted: ({saveditems})"
        return new
Example #6
0
def test_coord_unit_conversion_operators_a(operation, result_units):
    print(operation, result_units)
    in_km = Coord(data=np.linspace(4000, 1000, 10),
                  units='km',
                  mask=None,
                  title='something')

    scalar_in_m = 2. * ur.m

    operator_km = in_km.__getattribute__(operation)

    combined = operator_km(scalar_in_m)

    assert_equal_units(combined.units, result_units)
Example #7
0
def test_nddataset_add_mismatch_coords():
    coord1 = Coord(np.arange(5.0))
    coord2 = Coord(np.arange(1.0, 5.5, 1.0))
    d1 = NDDataset(np.ones((5, 5)), coordset=[coord1, coord2])
    d2 = NDDataset(np.ones((5, 5)), coordset=[coord2, coord1])
    with pytest.raises(CoordinateMismatchError) as exc:
        d1 -= d2
    assert str(
        exc.value).startswith("\nCoord.data attributes are not almost equal")
    with pytest.raises(CoordinateMismatchError) as exc:
        d1 += d2
    assert str(exc.value).startswith(
        "\nCoord.data attributes are not almost equal"
    )  # TODO= make more tests like this for various functions
Example #8
0
def test_coord_add_units_with_different_scale():
    d1 = Coord.arange(3.0, units="m")
    d2 = Coord.arange(3.0, units="cm")

    x = d1 + 1.0 * ur.cm
    assert x.data[1] == 1.01

    x = d1 + d2
    assert x.data[1] == 1.01
    x = d2 + d1
    assert x.data[1] == 101.0
    d1 += d2
    assert d1.data[1] == 1.01
    d2 += d1
    assert d2.data[1] == 102.0
Example #9
0
def test_coord_not_implemented(name):
    coord0 = Coord(data=np.linspace(4000, 1000, 10),
                   units='cm^-1',
                   mask=None,
                   title='wavelength')
    with pytest.raises(NotImplementedError):
        getattr(coord0, name)()
Example #10
0
def test_IRIS():
    X = NDDataset.read_omnic(os.path.join('irdata', 'CO@Mo_Al2O3.SPG'))

    p = [
        0.00300, 0.00400, 0.00900, 0.01400, 0.02100, 0.02600, 0.03600, 0.05100,
        0.09300, 0.15000, 0.20300, 0.30000, 0.40400, 0.50300, 0.60200, 0.70200,
        0.80100, 0.90500, 1.00400
    ]

    X.coordset.update(y=Coord(p, title='pressure', units='torr'))
    # Using the `update` method is mandatory because it will preserve the name.
    # Indeed, setting using X.coordset[0] = Coord(...) fails unless name is specified: Coord(..., name='y')

    # set the optimization parameters, perform the analysis
    # and plot the results

    param = {
        'epsRange': [-8, -1, 20],
        'lambdaRange': [-7, -5, 3],
        'kernel': 'langmuir'
    }

    X_ = X[:, 2250.:1950.]
    X_.plot()

    iris = IRIS(X_, param, verbose=True)

    f = iris.f
    X_hat = iris.reconstruct()

    iris.plotlcurve(scale='ln')
    f[0].plot(method='map', plottitle=True)
    X_hat[0].plot(plottitle=True)

    show()
Example #11
0
def test_linearcoord():
    coord1 = Coord([1, 2.5, 4, 5])

    coord2 = Coord(np.array([1, 2.5, 4, 5]))
    assert coord2 == coord1

    coord3 = Coord(range(10))

    coord4 = Coord(np.arange(10))
    assert coord4 == coord3

    coord5 = coord4.copy()
    coord5 += 1
    assert np.all(coord5.data == coord4.data + 1)

    assert coord5 is not None
    coord5.linear = True

    coord6 = Coord(linear=True, offset=2.0, increment=2.0, size=10)
    assert np.all(coord6.data == (coord4.data + 1.0) * 2.)

    LinearCoord(offset=2.0, increment=2.0, size=10)

    coord0 = LinearCoord.linspace(200.,
                                  300.,
                                  3,
                                  labels=['cold', 'normal', 'hot'],
                                  units="K",
                                  title='temperature')
    coord1 = LinearCoord.linspace(0.,
                                  60.,
                                  100,
                                  labels=None,
                                  units="minutes",
                                  title='time-on-stream')
    coord2 = LinearCoord.linspace(4000.,
                                  1000.,
                                  100,
                                  labels=None,
                                  units="cm^-1",
                                  title='wavenumber')

    assert coord0.size == 3
    assert coord1.size == 100
    assert coord2.size == 100

    coordc = coord0.copy()
    assert coord0 == coordc

    coordc = coord1.copy()
    assert coord1 == coordc
Example #12
0
def test_coord_unary_ufuncs_simple_data(name):
    coord0 = Coord(data=np.linspace(4000, 1000, 10),
                   units='km',
                   mask=None,
                   title='something')

    f = getattr(np, name)
    r = f(coord0)
    assert isinstance(r, Coord)
def test_coord_slicing():
    # slicing by index

    coord0 = Coord(data=np.linspace(4000, 1000, 10),
                   mask=None,
                   title="wavelength")

    assert coord0[0] == 4000.0

    coord1 = Coord(data=np.linspace(4000, 1000, 10),
                   units="cm^-1",
                   mask=None,
                   title="wavelength")
    c1 = coord1[0]
    assert isinstance(c1.values, Quantity)
    assert coord1[0].values == 4000.0 * (1.0 / ur.cm)

    # slicing with labels

    labs = list("abcdefghij")

    coord0 = Coord(
        data=np.linspace(4000, 1000, 10),
        labels=labs,
        units="cm^-1",
        mask=None,
        title="wavelength",
    )

    assert coord0[0].values == 4000.0 * (1.0 / ur.cm)
    assert isinstance(coord0[0].values, Quantity)

    assert coord0[2] == coord0["c"]
    assert coord0["c":"d"] == coord0[2:4]  # label included

    # slicing only-labels coordinates

    y = list("abcdefghij")
    a = Coord(labels=y, name="x")
    assert a.name == "x"
    assert isinstance(a.labels, np.ndarray)
    assert_array_equal(a.values, a.labels)
Example #14
0
 def sv(self):
     """|NDDataset|, Singular values"""
     size = self.s.size
     sv = self.s.copy()
     sv.name = 'sv'
     sv.title = 'singular values'
     sv.set_coordset(
         Coord(None,
               labels=['#%d' % (i + 1) for i in range(size)],
               title='Components'))
     return sv
Example #15
0
 def ev(self):
     """|NDDataset|, Explained variance"""
     size = self.s.size
     ev = self.s**2 / (size - 1)
     ev.name = 'ev'
     ev.title = 'explained variance'
     ev.set_coordset(
         Coord(None,
               labels=['#%d' % (i + 1) for i in range(size)],
               title='Components'))
     return ev
Example #16
0
def test_coord_slicing():
    # slicing by index

    coord0 = Coord(data=np.linspace(4000, 1000, 10),
                   mask=None,
                   title='wavelength')

    assert coord0[0] == 4000.0

    coord1 = Coord(data=np.linspace(4000, 1000, 10),
                   units='cm^-1',
                   mask=None,
                   title='wavelength')
    c1 = coord1[0]
    assert isinstance(c1.values, Quantity)
    assert coord1[0].values == 4000.0 * (1. / ur.cm)

    # slicing with labels

    labs = list('abcdefghij')

    coord0 = Coord(data=np.linspace(4000, 1000, 10),
                   labels=labs,
                   units='cm^-1',
                   mask=None,
                   title='wavelength')

    assert coord0[0].values == 4000.0 * (1. / ur.cm)
    assert isinstance(coord0[0].values, Quantity)

    assert coord0[2] == coord0['c']
    assert coord0['c':'d'] == coord0[2:4]  # label included

    # slicing only-labels coordinates

    y = list('abcdefghij')
    a = Coord(labels=y, name='x')
    assert a.name == 'x'
    assert isinstance(a.labels, np.ndarray)
    assert_array_equal(a.values, a.labels)
Example #17
0
 def ev(self):
     """
     Explained variance (|NDDataset|).
     """
     size = self.s.size
     ev = self.s**2 / (size - 1)
     ev.name = "ev"
     ev.title = "explained variance"
     ev.set_coordset(
         Coord(None,
               labels=[f"#{(i + 1)}" for i in range(size)],
               title="Components"))
     return ev
Example #18
0
 def sv(self):
     """
     Singular values (|NDDataset|).
     """
     size = self.s.size
     sv = self.s.copy()
     sv.name = "sv"
     sv.title = "singular values"
     sv.set_coordset(
         Coord(None,
               labels=[f"#{(i + 1)}" for i in range(size)],
               title="Components"))
     return sv
Example #19
0
    def update(self, **kwargs):
        """
        Update a specific coordinates in the CoordSet.

        Parameters
        ----------
        kwarg : Only keywords among the CoordSet.names are allowed - they denotes the name of a dimension.
        """
        dims = kwargs.keys()
        for dim in list(dims)[:]:
            if dim in self.names:
                # we can replace the given coordinates
                idx = self.names.index(dim)
                self[idx] = Coord(kwargs.pop(dim), name=dim)
Example #20
0
def _add_omnic_info(dataset, **kwargs):
    # get the time and name
    name = desc = dataset.name

    # modify the dataset metadata
    dataset.units = 'absorbance'
    dataset.title = 'absorbance'
    dataset.name = name
    dataset.description = ('Dataset from .csv file: {}\n'.format(desc))
    dataset.history = str(datetime.now(
        timezone.utc)) + ':read from omnic exported csv file \n'
    dataset.origin = 'omnic'

    # Set the NDDataset date
    dataset._date = datetime.now(timezone.utc)
    dataset._modified = dataset.date

    # x axis
    dataset.x.units = 'cm^-1'

    # y axis ?
    if '_' in name:
        name, dat = name.split('_')
        # if needed convert weekday name to English
        dat = dat.replace('Lun', 'Mon')
        dat = dat[:3].replace('Mar', 'Tue') + dat[3:]
        dat = dat.replace('Mer', 'Wed')
        dat = dat.replace('Jeu', 'Thu')
        dat = dat.replace('Ven', 'Fri')
        dat = dat.replace('Sam', 'Sat')
        dat = dat.replace('Dim', 'Sun')
        # convert month name to English
        dat = dat.replace('Aout', 'Aug')

        # get the dates
        acqdate = datetime.strptime(dat, "%a %b %d %H-%M-%S %Y")

        # Transform back to timestamp for storage in the Coord object
        # use datetime.fromtimestamp(d, timezone.utc))
        # to transform back to datetime obkct
        timestamp = acqdate.timestamp()

        dataset.y = Coord(np.array([timestamp]), name='y')
        dataset.set_coordtitles(y='acquisition timestamp (GMT)',
                                x='wavenumbers')
        dataset.y.labels = np.array([[acqdate], [name]])
        dataset.y.units = 's'

    return dataset
Example #21
0
def _add_omnic_info(dataset, **kwargs):
    # get the time and name
    name = desc = dataset.name

    # modify the dataset metadata
    dataset.units = "absorbance"
    dataset.title = "absorbance"
    dataset.name = name
    dataset.description = "Dataset from .csv file: {}\n".format(desc)
    dataset.history = (str(datetime.now(timezone.utc)) +
                       ":read from omnic exported csv file \n")
    dataset.origin = "omnic"

    # Set the NDDataset date
    dataset._date = datetime.now(timezone.utc)
    dataset._modified = dataset.date

    # x axis
    dataset.x.units = "cm^-1"

    # y axis ?
    if "_" in name:
        name, dat = name.split("_")
        # if needed convert weekday name to English
        dat = dat.replace("Lun", "Mon")
        dat = dat[:3].replace("Mar", "Tue") + dat[3:]
        dat = dat.replace("Mer", "Wed")
        dat = dat.replace("Jeu", "Thu")
        dat = dat.replace("Ven", "Fri")
        dat = dat.replace("Sam", "Sat")
        dat = dat.replace("Dim", "Sun")
        # convert month name to English
        dat = dat.replace("Aout", "Aug")

        # get the dates
        acqdate = datetime.strptime(dat, "%a %b %d %H-%M-%S %Y")

        # Transform back to timestamp for storage in the Coord object
        # use datetime.fromtimestamp(d, timezone.utc))
        # to transform back to datetime obkct
        timestamp = acqdate.timestamp()

        dataset.y = Coord(np.array([timestamp]), name="y")
        dataset.set_coordtitles(y="acquisition timestamp (GMT)",
                                x="wavenumbers")
        dataset.y.labels = np.array([[acqdate], [name]])
        dataset.y.units = "s"

    return dataset
Example #22
0
def _make_concentrations_matrix(*profiles):
    from spectrochempy.core.dataset.coord import Coord
    from spectrochempy.core.dataset.nddataset import NDDataset

    t = Coord(np.linspace(0, 10, 50), units='hour', title='time')
    c = []
    for p in profiles:
        c.append(p(t.data))
    ct = np.vstack(c)
    ct = ct - ct.min()
    ct = ct / np.sum(ct, axis=0)
    ct = NDDataset(data=ct,
                   title='concentration',
                   coordset=[range(len(ct)), t])

    return ct
Example #23
0
def _make_spectra_matrix(pos, width, ampl):
    from spectrochempy.core.dataset.coord import Coord
    from spectrochempy.core.dataset.nddataset import NDDataset
    from spectrochempy.core.fitting.models import gaussianmodel

    x = Coord(np.linspace(6000.0, 1000.0, 4000),
              units='cm^-1',
              title='wavenumbers')
    s = []
    for args in zip(ampl, width, pos):
        s.append(gaussianmodel().f(x.data, *args))

    st = np.vstack(s)
    st = NDDataset(data=st,
                   units='absorbance',
                   title='absorbance',
                   coordset=[range(len(st)), x])

    return st
Example #24
0
    def get_conc(self, n_pc=None):
        """
        Computes abstract concentration profile (first in - first out).

        Parameters
        ----------
        n_pc : int, optional, default:3
            Number of pure species for which the concentration profile must be
            computed.

        Returns
        --------
        concentrations
            Concentration profile.
        """
        M, K = self.f_ev.shape
        if n_pc is None:
            n_pc = K
        n_pc = min(K, n_pc)

        f = self.f_ev
        b = self.b_ev

        xcoord = Coord(range(n_pc), title="PS#")
        c = NDDataset(
            np.zeros((M, n_pc)),
            coordset=CoordSet(y=self._X.y, x=xcoord),
            name=f"C_EFA[{self._X.name}]",
            title="relative concentration",
            description="Concentration profile from EFA",
            history=f"{datetime.now(timezone.utc)}: created by spectrochempy",
        )
        if self._X.is_masked:
            masked_rows = np.all(self._X.mask, axis=-1)
        else:
            masked_rows = np.array([False] * M)

        for i in range(M):
            if masked_rows[i]:
                c[i] = MASKED
                continue
            c[i] = np.min((f.data[i, :n_pc], b.data[i, :n_pc][::-1]), axis=0)
        return c
Example #25
0
    def _valid_coordset(self, coords):
        # uses in coords_validate and setattr
        if coords is None:
            return

        for k, coord in enumerate(coords):

            if (coord is not None and not isinstance(coord, CoordSet)
                    and coord.data is None):
                continue

            # For coord to be acceptable, we require at least a NDArray, a NDArray subclass or a CoordSet
            if not isinstance(coord, (LinearCoord, Coord, CoordSet)):
                if isinstance(coord, NDArray):
                    coord = coords[k] = Coord(coord)
                else:
                    raise TypeError(
                        "Coordinates must be an instance or a subclass of Coord class or NDArray, or of "
                        f" CoordSet class, but an instance of {type(coord)} has been passed"
                    )

            if self.dims and coord.name in self.dims:
                # check the validity of the given coordinates in terms of size (if it correspond to one of the dims)
                size = coord.size

                if self.implements("NDDataset"):
                    idx = self._get_dims_index(
                        coord.name)[0]  # idx in self.dims
                    if size != self._data.shape[idx]:
                        raise ValueError(
                            f"the size of a coordinates array must be None or be equal"
                            f" to that of the respective `{coord.name}`"
                            f" data dimension but coordinate size={size} != data shape[{idx}]="
                            f"{self._data.shape[idx]}")
                else:
                    pass  # bypass this checking for any other derived type (should be done in the subclass)

        coords._parent = self
        return coords
Example #26
0
    def __setattr__(self, key, value):

        if key in DEFAULT_DIM_NAME:  # syntax such as ds.x, ds.y, etc...
            # Note the above test is important to avoid errors with traitlets
            # even if it looks redundant with the following
            if key in self.dims:
                if self._coordset is None:
                    # we need to create a coordset first
                    self.set_coordset(
                        dict((self.dims[i], None) for i in range(self.ndim)))
                idx = self._coordset.names.index(key)
                _coordset = self._coordset
                listcoord = False
                if isinstance(value, list):
                    listcoord = all(
                        [isinstance(item, Coord) for item in value])
                if listcoord:
                    _coordset[idx] = list(
                        CoordSet(value).to_dict().values())[0]
                    _coordset[idx].name = key
                    _coordset[idx]._is_same_dim = True
                elif isinstance(value, CoordSet):
                    if len(value) > 1:
                        value = CoordSet(value)
                    _coordset[idx] = list(value.to_dict().values())[0]
                    _coordset[idx].name = key
                    _coordset[idx]._is_same_dim = True
                elif isinstance(value, (Coord, LinearCoord)):
                    value.name = key
                    _coordset[idx] = value
                else:
                    _coordset[idx] = Coord(value, name=key)
                _coordset = self._valid_coordset(_coordset)
                self._coordset.set(_coordset)
            else:
                raise AttributeError(f"Coordinate `{key}` is not used.")
        else:
            super().__setattr__(key, value)
Example #27
0
def _read_txt(*args, **kwargs):
    # read Labspec *txt files or series

    dataset, filename = args
    content = kwargs.get("content", False)

    if content:
        pass
        # fid = io.StringIO(content)
        # TODO: get the l list of string

    else:
        fid = open(filename, "r", encoding="utf-8")
        try:
            lines = fid.readlines()
        except UnicodeDecodeError:
            fid = open(filename, "r", encoding="latin-1")
            lines = fid.readlines()
            fid.close()

    if len(lines) == 0:
        return

    # Metadata
    meta = Meta()

    i = 0
    while lines[i].startswith("#"):
        key, val = lines[i].split("=")
        key = key[1:]
        if key in meta.keys():
            key = f"{key} {i}"
        meta[key] = val.strip()
        i += 1

    # .txt extension is fairly common. We determine non labspc files based
    # on the absence of few keys. Two types of files (1D or 2D) are considered:
    labspec_keys_1D = ["Acq. time (s)", "Dark correction"]
    labspec_keys_2D = ["Exposition", "Grating"]

    if all(keywd in meta.keys() for keywd in labspec_keys_1D):
        pass
    elif all(keywd in meta.keys() for keywd in labspec_keys_2D):
        pass
    else:
        # this is not a labspec txt file"
        return

    # read spec
    rawdata = np.genfromtxt(lines[i:], delimiter="\t")

    # populate the dataset
    if rawdata.shape[1] == 2:
        data = rawdata[:, 1][np.newaxis]
        _x = Coord(rawdata[:, 0], title="Raman shift", units="1/cm")
        _y = Coord(None, title="Time", units="s")
        date_acq, _y = _transf_meta(_y, meta)

    else:
        data = rawdata[1:, 1:]
        _x = Coord(rawdata[0, 1:], title="Raman shift", units="1/cm")
        _y = Coord(rawdata[1:, 0], title="Time", units="s")
        date_acq, _y = _transf_meta(_y, meta)

    # try to transform to linear coord
    _x.linear = True

    # if success linear should still be True
    if _x.linear:
        _x = LinearCoord(_x)

    # set dataset metadata
    dataset.data = data
    dataset.set_coordset(y=_y, x=_x)
    dataset.title = "Counts"
    dataset.units = None
    dataset.name = filename.stem
    dataset.meta = meta

    # date_acq is Acquisition date at start (first moment of acquisition)
    dataset.description = "Spectrum acquisition : " + str(date_acq)

    # Set the NDDataset date
    dataset._date = datetime.datetime.now(datetime.timezone.utc)
    dataset._modified = dataset.date

    # Set origin, description and history
    dataset.history = f"{dataset.date}:imported from LabSpec6 text file {filename}"

    return dataset
Example #28
0
    def __init__(self, dataset, centered=True, standardized=False, scaled=False):
        """
        Parameters
        ----------
        dataset : |NDDataset| object
            The input dataset has shape (M, N). M is the number of
            observations (for examples a series of IR spectra) while N
            is the number of features (for example the wavenumbers measured
            in each IR spectrum).
        centered : bool, optional, default:True
            If True the data are centered around the mean values: :math:`X' = X - mean(X)`.
        standardized : bool, optional, default:False
            If True the data are scaled to unit standard deviation: :math:`X' = X / \\sigma`.
        scaled : bool, optional, default:False
            If True the data are scaled in the interval [0-1]: :math:`X' = (X - min(X)) / (max(X)-min(X))`
        """
        self.prefs = dataset.preferences

        self._X = X = dataset

        Xsc = X.copy()

        # mean center the dataset
        # -----------------------
        self._centered = centered
        if centered:
            self._center = center = np.mean(X, axis=0)
            Xsc = X - center
            Xsc.title = "centered %s" % X.title

        # Standardization
        # ---------------
        self._standardized = standardized
        if standardized:
            self._std = np.std(Xsc, axis=0)
            Xsc /= self._std
            Xsc.title = "standardized %s" % Xsc.title

        # Scaling
        # -------
        self._scaled = scaled
        if scaled:
            self._min = np.min(Xsc, axis=0)
            self._ampl = np.ptp(Xsc, axis=0)
            Xsc -= self._min
            Xsc /= self._ampl
            Xsc.title = "scaled %s" % Xsc.title

        self._Xscaled = Xsc

        # perform SVD
        # -----------
        svd = SVD(Xsc)
        sigma = svd.s.diag()
        U = svd.U
        VT = svd.VT

        # select n_pc loadings & compute scores
        # --------------------------------------------------------------------

        # loadings

        LT = VT
        LT.title = 'loadings (L^T) of ' + X.name
        LT.history = 'Created by PCA'

        # scores

        S = dot(U, sigma)
        S.title = 'scores (S) of ' + X.name
        S.set_coordset(y=X.y,
                       x=Coord(None, labels=['#%d' % (i + 1) for i in range(svd.s.size)], title='principal component'))

        S.description = 'scores (S) of ' + X.name
        S.history = 'Created by PCA'

        self._LT = LT
        self._S = S

        # other attributes
        # ----------------

        self._sv = svd.sv
        self._sv.x.title = 'PC #'

        self._ev = svd.ev
        self._ev.x.title = 'PC #'

        self._ev_ratio = svd.ev_ratio
        self._ev_ratio.x.title = 'PC #'

        self._ev_cum = svd.ev_cum
        self._ev_cum.x.title = 'PC #'

        return
Example #29
0
def align(dataset, *others, **kwargs):
    """
    Align individual |NDDataset| along given dimensions using various methods.

    Parameters
    -----------
    dataset : |NDDataset|
        Dataset on which we want to salign other objects.
    *others : |NDDataset|
        Objects to align.
    dim : str. Optional, default='x'
        Along which axis to perform the alignment.
    dims : list of str, optional, default=None
        Align along all dims defined in dims (if dim is also
        defined, then dims have higher priority).
    method : enum ['outer', 'inner', 'first', 'last', 'interpolate'], optional, default='outer'
        Which method to use for the alignment.

        If align is defined :

        * 'outer' means that a union of the different coordinates is
        achieved (missing values are masked)
        * 'inner' means that the intersection of the coordinates is used
        * 'first' means that the first dataset is used as reference
        * 'last' means that the last dataset is used as reference
        * 'interpolate' means that interpolation is performed relative to
        the first dataset.
    interpolate_method : enum ['linear','pchip']. Optional, default='linear'
        Method of interpolation to performs for the alignment.
    interpolate_sampling : 'auto', int or float. Optional, default='auto'

        * 'auto' : sampling is determined automatically from the existing data.
        * int :  if an integer values is specified, then the
          sampling interval for the interpolated data will be splitted in
          this number of points.
        * float : If a float value is provided, it determines the interval
        between the interpolated data.
    coord : |Coord|, optional, default=None
        coordinates to use for alignment. Ignore those corresponding to the
        dimensions to align.
    copy : bool, optional, default=True
        If False then the returned objects will share memory with the
        original objects, whenever it is possible :
        in principle only if reindexing is not necessary.

    Returns
    --------
    aligned_datasets : tuple of |NDDataset|
        Same objects as datasets with dimensions aligned.

    Raises
    ------
    ValueError
        issued when the dimensions given in `dim` or `dims` argument are not
        compatibles (units, titles, etc...).
    """
    # DEVELOPPER NOTE
    # There is probably better methods, but to simplify dealing with
    # LinearCoord, we transform them in Coord before treatment (going back
    # to linear if possible at the end of the process)

    # TODO: Perform an alignment along numeric labels
    # TODO: add example in docs

    # copy objects?
    copy = kwargs.pop('copy', True)

    # make a single list with dataset and the remaining object
    objects = [dataset] + list(others)

    # should we align on given external coordinates
    extern_coord = kwargs.pop('coord', None)
    if extern_coord and extern_coord.implements('LinearCoord'):
        extern_coord = Coord(extern_coord, linear=False, copy=True)

    # what's the method to use (by default='outer')
    method = kwargs.pop('method', 'outer')

    # trivial cases where alignment is not possible or unecessary
    if not objects:
        warning_('No object provided for alignment!')
        return None

    if len(objects) == 1 and objects[0].implements(
            'NDDataset') and extern_coord is None:
        # no necessary alignment
        return objects

    # evaluate on which axis we align
    axis, dims = dataset.get_axis(only_first=False, **kwargs)

    # check compatibility of the dims and prepare the dimension for alignment
    for axis, dim in zip(axis, dims):

        # get all objets to align
        _objects = {}
        _nobj = 0

        for idx, object in enumerate(objects):

            if not object.implements('NDDataset'):
                error_(
                    f'Bad object(s) found: {object}. Note that only NDDataset '
                    f'objects are accepted '
                    f'for alignment')
                return None

            _objects[_nobj] = {
                'obj': object.copy(),
                'idx': idx,
            }
            _nobj += 1

        _last = _nobj - 1

        # get the reference object (by default the first, except if method if
        # set to 'last'
        ref_obj_index = 0
        if method == 'last':
            ref_obj_index = _last

        ref_obj = _objects[ref_obj_index]['obj']

        # as we will sort their coordinates at some point, we need to know
        # if the coordinates need to be reversed at
        # the end of the alignment process
        reversed = ref_obj.coordset[dim].reversed
        if reversed:
            ref_obj.sort(descend=False, dim=dim, inplace=True)

        # get the coordset corresponding to the reference object
        ref_obj_coordset = ref_obj.coordset

        # get the coordinate for the reference dimension
        ref_coord = ref_obj_coordset[dim]

        # as we will sort their coordinates at some point, we need to know
        # if the coordinates need to be reversed at
        # the end of the alignment process
        reversed = ref_coord.reversed

        # prepare a new Coord object to store the final new dimension
        new_coord = ref_coord.copy()

        ndec = get_n_decimals(new_coord.data.max(), 1.e-5)

        if new_coord.implements('LinearCoord'):
            new_coord = Coord(new_coord, linear=False, copy=True)

        # loop on all object
        for index, object in _objects.items():

            obj = object['obj']

            if obj is ref_obj:
                # not necessary to compare with itself!
                continue

            if reversed:
                obj.sort(descend=False, dim=dim, inplace=True)

            # get the current objet coordinates and check compatibility
            coord = obj.coordset[dim]
            if coord.implements('LinearCoord') or coord.linear:
                coord = Coord(coord, linear=False, copy=True)

            if not coord.is_units_compatible(ref_coord):
                # not compatible, stop everything
                raise UnitsCompatibilityError(
                    'NDataset to align must have compatible units!')

            # do units transform if necesssary so coords can be compared
            if coord.units != ref_coord.units:
                coord.ito(ref_coord)

            # adjust the new_cord depending on the method of alignement

            new_coord_data = set(np.around(new_coord.data, ndec))
            coord_data = set(np.around(coord.data, ndec))

            if method in ['outer', 'interpolate']:
                # in this case we do a union of the coords (masking the
                # missing values)
                # For method=`interpolate`, the interpolation will be
                # performed in a second step
                new_coord._data = sorted(coord_data | new_coord_data)

            elif method == 'inner':
                # take only intersection of the coordinates
                # and generate a warning if it result something null or
                new_coord._data = sorted(coord_data & new_coord_data)

            elif method in ['first', 'last']:
                # we take the reference coordinates already determined as
                # basis (masking the missing values)
                continue

            else:
                raise NotImplementedError(f'The method {method} is unknown!')

        # Now perform alignment of all objects on the new coordinates
        for index, object in _objects.items():

            obj = object['obj']

            # get the dim index for the given object
            dim_index = obj.dims.index(dim)

            # prepare slicing keys ; set slice(None) for the untouched
            # dimensions preceeding the dimension of interest
            prepend_keys = [slice(None)] * dim_index

            # New objects for obj must be created with the new coordinates

            # change the data shape
            new_obj_shape = list(obj.shape)
            new_obj_shape[dim_index] = len(new_coord)
            new_obj_data = np.full(new_obj_shape, np.NaN)

            # create new dataset for obj and ref_objects
            if copy:
                new_obj = obj.copy()
            else:
                new_obj = obj

            # update the data and mask
            coord = obj.coordset[dim]
            coord_data = set(np.around(coord.data, ndec))

            dim_loc = new_coord._loc2index(sorted(coord_data))
            loc = tuple(prepend_keys + [dim_loc])

            new_obj._data = new_obj_data

            # mask all the data then unmask later the relevant data in
            # the next step

            if not new_obj.is_masked:
                new_obj.mask = MASKED
                new_obj.mask[loc] = False
            else:
                mask = new_obj.mask.copy()
                new_obj.mask = MASKED
                new_obj.mask[loc] = mask

            # set the data for the loc
            new_obj._data[loc] = obj.data

            # update the coordinates
            new_coordset = obj.coordset.copy()
            if coord.is_labeled:
                label_shape = list(coord.labels.shape)
                label_shape[0] = new_coord.size
                new_coord._labels = np.zeros(tuple(label_shape)).astype(
                    coord.labels.dtype)
                new_coord._labels[:] = '--'
                new_coord._labels[dim_loc] = coord.labels
            setattr(new_coordset, dim, new_coord)
            new_obj._coordset = new_coordset

            # reversed?
            if reversed:
                # we must reverse the given coordinates
                new_obj.sort(descend=reversed, dim=dim, inplace=True)

            # update the _objects
            _objects[index]['obj'] = new_obj

            if method == 'interpolate':
                warning_(
                    'Interpolation not yet implemented - for now equivalent '
                    'to `outer`')

        # the new transformed object must be in the same order as the passed
        # objects
        # and the missing values must be masked (for the moment they are defined to NaN

        for index, object in _objects.items():
            obj = object['obj']
            # obj[np.where(np.isnan(obj))] = MASKED  # mask NaN values
            obj[np.where(np.isnan(
                obj))] = 99999999999999.  # replace NaN values (to simplify
            # comparisons)
            idx = int(object['idx'])
            objects[idx] = obj

            # we also transform into linear coord if possible ?
            pass  # TODO:

    # Now return

    return tuple(objects)
def concatenate(*datasets, **kwargs):
    """
    Concatenation of |NDDataset| objects along a given axis.

    Any number of |NDDataset| objects can be concatenated (by default
    the last on the last dimension). For this operation
    to be defined the following must be true :

        #. all inputs must be valid |NDDataset| objects;
        #. units of data must be compatible
        #. concatenation is along the axis specified or the last one;
        #. along the non-concatenated dimensions, shapes must match.

    Parameters
    ----------
    *datasets : positional |NDDataset| arguments
        The dataset(s) to be concatenated to the current dataset. The datasets
        must have the same shape, except in the dimension corresponding to axis
        (the last, by default).
    **kwargs
        Optional keyword parameters (see Other Parameters).

    Returns
    --------
    out
        A |NDDataset| created from the contenations of the |NDDataset| input objects.

    Other Parameters
    ----------------
    dims : str, optional, default='x'
        The dimension along which the operation is applied.

    axis : int, optional
        The axis along which the operation is applied.

    See Also
    ---------
    stack : Stack of |NDDataset| objects along a new dimension.

    Examples
    --------
    >>> A = scp.read('irdata/nh4y-activation.spg', protocol='omnic')
    >>> B = scp.read('irdata/nh4y-activation.scp')
    >>> C = scp.concatenate(A[10:], B[3:5], A[:10], axis=0)
    >>> A[10:].shape, B[3:5].shape, A[:10].shape, C.shape
    ((45, 5549), (2, 5549), (10, 5549), (57, 5549))

    or

    >>> D = A.concatenate(B, B, axis=0)
    >>> A.shape, B.shape, D.shape
    ((55, 5549), (55, 5549), (165, 5549))

    >>> E = A.concatenate(B, axis=1)
    >>> A.shape, B.shape, E.shape
    ((55, 5549), (55, 5549), (55, 11098))
    """

    # check uise
    if "force_stack" in kwargs:
        warn("force_stack not used anymore, use stack() instead",
             DeprecationWarning)
        return stack(datasets)

    # get a copy of input datasets in order that input data are not modified
    datasets = _get_copy(datasets)

    # get axis from arguments
    axis, dim = datasets[0].get_axis(**kwargs)

    # check shapes, except for dim along which concatenation will be done
    shapes = {ds.shape[:axis] + ds.shape[axis + 1:] for ds in datasets}
    if len(shapes) != 1:
        raise DimensionsCompatibilityError(
            "all input arrays must have the same shape")

    # check units
    units = tuple(set(ds.units for ds in datasets))
    if len(units) == 1:
        units = datasets[0].units
    else:
        # check compatibility
        for i, u1 in enumerate(units[:-1]):
            for u2 in units[i + 1:]:
                if u1.dimensionality != u2.dimensionality:
                    raise UnitsCompatibilityError(
                        f"Units of the data are {[str(u) for u in units]}. The datasets can't be concatenated"
                    )
        # should be compatible, so convert
        units = datasets[0].units
        for ds in datasets[1:]:
            if ds.units != units:
                ds.ito(units)

    # concatenate or stack the data array + mask
    # --------------------------------------------

    sss = []
    for i, dataset in enumerate(datasets):
        d = dataset.masked_data
        sss.append(d)

    sconcat = np.ma.concatenate(sss, axis=axis)

    data = np.asarray(sconcat)
    mask = sconcat.mask

    # now manage coordinates and labels
    coords = datasets[0].coordset

    if coords is not None:

        if not coords[dim].is_empty:

            labels = []
            if coords[dim].is_labeled:
                for ds in datasets:
                    labels.append(ds.coordset[dim].labels)

            if coords[dim].implements() in ["Coord", "LinearCoord"]:
                coords[dim] = Coord(coords[dim], linear=False)
                if labels != []:
                    coords[dim]._labels = np.concatenate(labels)
            elif coords[dim].implements("CoordSet"):
                if labels != []:
                    labels = np.array(labels)
                    for i, coord in enumerate(coords[dim]):
                        if labels[:i].size != 0:
                            coord._labels = np.concatenate(
                                [label for label in labels[:, i]])

            coords[dim]._data = np.concatenate(
                tuple((ds.coordset[dim].data for ds in datasets)))

    out = dataset.copy()
    out._data = data
    if coords is not None:
        out._coordset[dim] = coords[dim]
    out._mask = mask
    out._units = units

    out.description = f"Concatenation of {len(datasets)}  datasets:\n"
    out.description += "( {}".format(datasets[0].name)
    out.title = datasets[0].title
    authortuple = (datasets[0].author, )

    for dataset in datasets[1:]:

        if out.title != dataset.title:
            warn(
                "Different data title => the title is that of the 1st dataset")

        if not (dataset.author in authortuple):
            authortuple = authortuple + (dataset.author, )

        out.author = " & ".join([str(author) for author in authortuple])

        out.description += ", {}".format(dataset.name)

    out.description += " )"
    out._date = out._modified = datetime.datetime.now(datetime.timezone.utc)
    out._history = [str(out.date) + ": Created by concatenate"]

    return out