Example #1
0
def test_bug_243():
    import spectrochempy as scp

    D = scp.zeros((10, 100))

    x = scp.LinearCoord(offset=0.0, increment=1.0, size=100)
    y = scp.LinearCoord(offset=0.0, increment=1.0, size=10)

    D.set_coordset(x=x, y=y)
    D1 = D[:, 0.:10.]
    D2 = D[:, 20.:40.]

    D12 = scp.concatenate(D1, D2, axis=1)

    # D2.x.data[-1] is 40., as expected, but not D12.x.data[-1]:
    assert D12.x.data[-1] == D2.x.data[-1]
def test_concatenate(IR_dataset_2D):
    dataset = IR_dataset_2D
    dim = "x"
    # print(dataset)
    s = dataset
    s1 = dataset[:, -10:]
    s2 = dataset[:, :-10]

    # specify axis
    s = concatenate(s1, s2, dims=dim)
    assert s.units == s1.units
    assert s.shape[-1] == (s1.shape[-1] + s2.shape[-1])
    assert s.x.size == (s1.x.size + s2.x.size)
    assert s.x != dataset.x
    s = s.sort(dims=dim, descend=True)  #
    assert_dataset_almost_equal(s.x, Coord(dataset.x, linear=False), decimal=3)

    # default concatenation in the last dimension
    s = concatenate(s1, s2)
    assert s.units == s1.units
    assert s.shape[-1] == (s1.shape[-1] + s2.shape[-1])
    assert s.x.size == (s1.x.size + s2.x.size)
    assert s.x != dataset.x
    s = s.sort(descend=True)  #
    assert_dataset_almost_equal(s.x, Coord(dataset.x, linear=False), decimal=3)

    s1 = dataset[:10]
    s2 = dataset[20:]
    # check with derived units
    s1.to(ur.m, force=True)
    s2.to(ur.dm, force=True)
    s = concatenate(s1, s2, dims=0)
    assert s.units == s1.units
    assert s.shape[0] == (s1.shape[0] + s2.shape[0])
    assert s.y.size == (s1.y.size + s2.y.size)
    s = s.sort(dim="y")

    # second syntax
    s = s1.concatenate(s2, dims=0)
    assert s.units == s1.units
    assert s.shape[0] == (s1.shape[0] + s2.shape[0])
    assert s.y.size == (s1.y.size + s2.y.size)

    # third syntax
    s = concatenate((s1, s2), dims=0)
    assert s.units == s1.units
    assert s.shape[0] == (s1.shape[0] + s2.shape[0])
    assert s.y.size == (s1.y.size + s2.y.size)

    # coordset
    coord_2 = Coord(np.log(s.y.data), title="log_time")
    s.set_coordset(y=[s.y, coord_2], x=s.x)
    s1 = s[:2]
    s2 = s[-5:]
    s12 = concatenate(s1, s2, axis=0)
    assert (s2["y"].labels[1] == s12["y"].labels[1][-5:]).all()

    # authors
    s0 = s[0]
    s1 = s[1]
    s0.author = "sdqe65g4rf"
    s2 = concatenate(s0, s1)
    assert "sdqe65g4rf" in s2.author and s1.author in s2.author

    # titles
    s0.title = "new_title"
    assert concatenate(s0, s1).title == "new_title"

    # incompatible dimensions
    s0 = scp.NDDataset(np.zeros((10, 100)))
    s1 = scp.NDDataset(np.zeros((10, 100)))
    with pytest.raises(DimensionsCompatibilityError):
        s0.concatenate(s1[0].squeeze())

    with pytest.raises(DimensionsCompatibilityError):
        s0.concatenate(s1[:, :50], axis=0)

    # incompatible units
    s0 = scp.NDDataset(np.zeros((10, 100)), units="V")
    s1 = scp.NDDataset(np.zeros((10, 100)), units="A")
    with pytest.raises(UnitsCompatibilityError):
        scp.concatenate(s0, s1)

    s1 = scp.NDDataset(np.ones((10, 100)), units="mV")
    s01 = scp.concatenate(s0, s1)
    assert s01.data[-1, -1] == 0.001

    # -------------------
    # Stack

    # concatenation using stack
    s1 = dataset[:10]
    s2 = dataset[-10:]
    s = stack(s1, s2)
    assert s.units == s1.units
    assert s.shape == (2, s1.shape[0], s1.shape[1])
    assert s.y.size == s1.y.size
    assert s.x.size == s1.x.size

    with pytest.warns(DeprecationWarning):
        concatenate(s1, s2, force_stack=True)

    # If one of the dimensions is of size one, then this dimension is NOT removed before stacking
    s0 = dataset[0]
    s1 = dataset[1]
    ss = stack(s0, s1)
    assert s0.shape == (1, 5549)
    assert ss.shape == (2, s1.shape[0], s1.shape[1])

    # # stack squeezed nD dataset
    s0 = dataset[0].copy().squeeze()
    assert s0.shape == (5549,)
    s1 = dataset[1].squeeze()
    assert s1.shape == (5549,)
    s = stack(s0, s1)
    assert s.shape == (2, 5549)

    # # stack squeezed nD dataset
    s2 = s1[0:100]
    with pytest.raises(DimensionsCompatibilityError):
        s = stack(s0, s2)
Example #3
0
def test_EFA(IR_dataset_2D):

    ########################################################################################################################
    # Generate a test dataset
    # ------------------------------------------------------------------
    # 1) simulated chromatogram
    # *************************

    ntimes = 250
    ncomponents = 2

    t = scp.LinearCoord.arange(ntimes, units="minutes",
                               title="time")  # time coordinates
    c = scp.Coord(range(ncomponents),
                  title="components")  # component coordinates

    data = np.zeros((ncomponents, ntimes), dtype=np.float64)

    data[0] = asymmetricvoigtmodel().f(t,
                                       ampl=4,
                                       width=10,
                                       ratio=0.5,
                                       asym=0.4,
                                       pos=50.0)  # compound 1
    data[1] = asymmetricvoigtmodel().f(t,
                                       ampl=5,
                                       width=20,
                                       ratio=0.2,
                                       asym=0.9,
                                       pos=120.0)  # compound 2

    dsc = scp.NDDataset(data=data, coords=[c, t])
    dsc.plot()
    show()

    ########################################################################################################################
    # 2) absorption spectra
    # **********************

    spec = np.array([[2.0, 3.0, 4.0, 2.0], [3.0, 4.0, 2.0, 1.0]])
    w = scp.Coord(np.arange(1, 5, 1), units="nm", title="wavelength")

    dss = scp.NDDataset(data=spec, coords=[c, w])
    dss.plot()

    ########################################################################################################################
    # 3) simulated data matrix
    # ************************

    dataset = scp.dot(dsc.T, dss)
    dataset.data = np.random.normal(dataset.data, 0.2)
    dataset.title = "intensity"

    dataset.plot()
    show()

    ########################################################################################################################
    # 4) evolving factor analysis (EFA)
    # *********************************

    efa = scp.EFA(dataset)

    ########################################################################################################################
    # Plots of the log(EV) for the forward and backward analysis
    #

    efa.f_ev.T.plot(yscale="log", legend=efa.f_ev.y.labels)

    efa.b_ev.T.plot(yscale="log")

    ########################################################################################################################
    # Looking at these EFA curves, it is quite obvious that only two components
    # are really significant, and this corresponds to the data that we have in
    # input.
    # We can consider that the third EFA components is mainly due to the noise,
    # and so we can use it to set a cut of values

    n_pc = 2
    efa.cutoff = np.max(efa.f_ev[:, n_pc].data)

    f2 = efa.f_ev
    b2 = efa.b_ev

    # we concatenate the datasets to plot them in a single figure
    both = scp.concatenate(f2, b2)
    both.T.plot(yscale="log")

    # TODO: add "legend" keyword in NDDataset.plot()

    # ########################################################################################################################
    # # Get the abstract concentration profile based on the FIFO EFA analysis
    # #
    # efa.cutoff = None
    # c = efa.get_conc(n_pc)
    # c.T.plot()
    #
    # # scp.show()  # uncomment to show plot if needed (not necessary in jupyter notebook)
    #
    # ds = IR_dataset_2D.copy()
    #
    # # columns masking
    # ds[:, 1230.0:920.0] = MASKED  # do not forget to use float in slicing
    # ds[:, 5900.0:5890.0] = MASKED
    #
    # # difference spectra
    # ds -= ds[-1]
    #
    # # column masking for bad columns
    # ds[10:12] = MASKED
    #
    # efa = EFA(ds)
    #
    # n_pc = 4
    # c = efa.get_conc(n_pc)
    # c.T.plot()
    #

    show()
efa.b_ev.T.plot(yscale="log")

########################################################################################################################
# Looking at these EFA curves, it is quite obvious that only two components
# are really significant, and this corresponds to the data that we have in
# input.
# We can consider that the third EFA components is mainly due to the noise,
# and so we can use it to set a cut of values

n_pc = 2
efa.cutoff = np.max(efa.f_ev[:, n_pc].data)

f2 = efa.f_ev
b2 = efa.b_ev

# we concatenate the datasets to plot them in a single figure
both = scp.concatenate(f2, b2)
both.T.plot(yscale="log")

# TODO: add "legend" keyword in NDDataset.plot()

########################################################################################################################
# Get the abstract concentration profile based on the FIFO EFA analysis
#
efa.cutoff = None
c = efa.get_conc(n_pc)
c.T.plot()

# scp.show()  # uncomment to show plot if needed (not necessary in jupyter notebook)