def test_read_matlab(): A = NDDataset.read_matlab(MATLABDATA / "als2004dataset.MAT") assert len(A) == 6 assert A[3].shape == (4, 96) A = NDDataset.read_matlab(MATLABDATA / "dso.mat") assert A.name == "Group sust_base line withoutEQU.SPG" assert A.shape == (20, 426)
def test_read_opus(): # single file A = NDDataset.read_opus(OPUSDATA / "test.0000") assert A.shape == (1, 2567) assert A[0, 2303.8694].data == pytest.approx(2.72740, 0.00001) # read contents p = OPUSDATA / "test.0000" content = p.read_bytes() F = NDDataset.read_opus({p.name: content}) assert F.name == p.name assert F.shape == (1, 2567) assert NDDataset.read_opus(OPUSDATA / "background.0") is None
def test_write(): nd = scp.read_omnic('irdata/nh4y-activation.spg') # API write methods needs an instance of a NDDataset as the first argument with pytest.raises(TypeError): scp.write() # the simplest way to save a dataset, is to use the function write with a filename as argument filename = nd.write('essai.scp') assert filename == cwd / 'essai.scp' nd2 = NDDataset.load(filename) testing.assert_dataset_equal(nd2, nd) filename.unlink() # if the filename is omitted, the a dialog is opened to select a name (and a protocol) filename = nd.write() assert filename is not None assert filename.stem == nd.name assert filename.suffix == '.scp' filename.unlink() # # a write protocole can be specified # filename = nd.write(protocole='json') # assert filename is not None # assert filename.stem == nd.name # assert filename.suffix == '.json' # filename.unlink() irdatadir = pathclean(prefs.datadir) / "irdata" for f in ['essai.scp', 'nh4y-activation.scp']: if (irdatadir / f).is_file(): (irdatadir / f).unlink()
def test_notebook_basecor_bug(): dataset = NDDataset.read_omnic( os.path.join('irdata', 'nh4y-activation.spg')) s = dataset[:, 1260.0:5999.0] s = s - s[-1] # Important note that we use floating point number # integer would mean points, not wavenumbers! basc = BaselineCorrection(s) ranges = [[1261.86, 1285.89], [1556.30, 1568.26], [1795.00, 1956.75], [3766.03, 3915.81], [4574.26, 4616.04], [4980.10, 4998.01], [5437.52, 5994.70]] # predifined ranges _ = basc.run(*ranges, method='multivariate', interpolation='pchip', npc=5, figsize=(6, 6), zoompreview=4) # The regions used to set the baseline are accessible using the `ranges` # attibute: ranges = basc.ranges print(ranges) basc.corrected.plot_stack()
def test_issue_375(): # minimal example n_pc = 3 color1, color2 = "b", "r" ratio = NDDataset([1, 2, 3]) cum = ratio.cumsum() ax1 = ratio.plot_bar(color=color1, title="Scree plot") assert len(ax1.lines) == 0, "no lines" assert len(ax1.patches) == 3, "bar present" ax2 = cum.plot_scatter(color=color2, pen=True, markersize=7.0, twinx=ax1) assert len(ax2.lines) == 1, "1 lines" assert len(ax2.patches) == 0, "no bar present on the second plot" # TODO: Don't know yet how to get the marker present. ax1.set_title("Scree plot") show()
def make_samples(force_original=False): _samples = { 'P350': { 'label': r'$\mathrm{M_P}\,(623\,K)$' }, # 'A350': {'label': r'$\mathrm{M_A}\,(623\,K)$'}, # 'B350': {'label': r'$\mathrm{M_B}\,(623\,K)$'} } for key, sample in _samples.items(): # our data are in our test `datadir` directory. basename = os.path.join(prefs.datadir, f'agirdata/{key}/FTIR/FTIR') if os.path.exists(basename + '.scp') and not force_original: # check if the scp file have already been saved filename = basename + '.scp' sample['IR'] = NDDataset.read(filename) else: # else read the original zip file filename = basename + '.zip' sample['IR'] = NDDataset.read_zip(filename, only=5, origin='omnic', merge=True) # save sample['IR'].save() for key, sample in _samples.items(): basename = os.path.join(prefs.datadir, f'agirdata/{key}/TGA/tg') if os.path.exists(basename + '.scp') and not force_original: # check if the scp file have already been saved filename = basename + '.scp' sample['TGA'] = NDDataset.read(filename) else: # else read the original csv file filename = basename + '.csv' ss = sample['TGA'] = NDDataset.read_csv(filename, origin='tga') ss.squeeze(inplace=True) # lets keep only data from something close to 0. s = sample['TGA'] = ss[-0.5:35.0] # save s.save() return _samples
def test_generic_read(): # filename + extension specified ds = scp.read('wodger.spg') assert ds.name == 'wodger' # save with no filename (should save wodger.scp) path = ds.save() assert isinstance(path, Path) assert path.stem == ds.name assert path.parent == ds.directory # read should be équivalent to load (but read is a more general function, dataset = NDDataset.load('wodger.scp') assert dataset.name == 'wodger'
def test_plot2D_as_3D(): data = NDDataset.read_matlab( os.path.join('matlabdata', 'als2004dataset.MAT')) X = data[0] # X.plot_3D() X.plot_surface() X.set_coordset(y=Coord(title='elution time'), x=Coord(title='wavenumbers')) X.title = 'intensity' X.plot_surface() X.plot_surface(colorbar=True) show() pass
def test_plot2D(): A = NDDataset.read_omnic("irdata/nh4y-activation.spg") A.y -= A.y[0] A.y.to("hour", inplace=True) A.y.title = "Acquisition time" A.copy().plot_stack() A.copy().plot_stack(data_transposed=True) A.copy().plot_image(style=["sans", "paper"], fontsize=9) # use preferences prefs = A.preferences prefs.reset() prefs.image.cmap = "magma" prefs.font.size = 10 prefs.font.weight = "bold" prefs.axes.grid = True A.plot() A.plot(style=["sans", "paper", "grayscale"], colorbar=False) show()
def test_plot2D_as_3D(): data = NDDataset.read_matlab( os.path.join("matlabdata", "als2004dataset.MAT")) X = data[0] X.plot_surface() X.set_coordset( y=Coord(title="elution time", units="s"), x=Coord(title="wavenumbers", units="cm^-1"), ) X.title = "intensity" X.plot_surface() X.plot_surface(colorbar=True) show() pass
def test_plot2D(): A = NDDataset.read_omnic('irdata/nh4y-activation.spg') A.y -= A.y[0] A.y.to('hour', inplace=True) A.y.title = u'Aquisition time' A.copy().plot_stack() A.copy().plot_stack(data_transposed=True) A.copy().plot_image(style=['sans', 'paper'], fontsize=9) # use preferences prefs = A.preferences prefs.reset() prefs.image.cmap = 'magma' prefs.font.size = 10 prefs.font.weight = 'bold' prefs.axes.grid = True A.plot() A.plot(style=['sans', 'paper', 'grayscale'], colorbar=False) show() pass
def test_write(): nd = scp.read_omnic("irdata/nh4y-activation.spg") # API write methods needs an instance of a NDDataset as the first argument with pytest.raises(TypeError): scp.write() # the simplest way to save a dataset, is to use the function write with a filename as argument if (cwd / "essai.scp").exists(): (cwd / "essai.scp").unlink() filename = nd.write("essai.scp") # should not open a DIALOG assert filename == cwd / "essai.scp" assert filename.exists() # try to write it again filename = nd.write("essai.scp") # should open a DIALOG to confirm nd2 = NDDataset.load(filename) testing.assert_dataset_equal(nd2, nd) filename.unlink() # if the filename is omitted, a dialog is opened to select a name (and a protocol) filename = nd.write() assert filename is not None assert filename.stem == nd.name assert filename.suffix == ".scp" filename.unlink() # # a write protocole can be specified # filename = nd.write(protocole='json') # assert filename is not None # assert filename.stem == nd.name # assert filename.suffix == '.json' # filename.unlink() irdatadir = pathclean(prefs.datadir) / "irdata" for f in ["essai.scp", "nh4y-activation.scp"]: if (irdatadir / f).is_file(): (irdatadir / f).unlink()
def test_plot_1D(): dataset = NDDataset.read_omnic( os.path.join("irdata", "nh4y-activation.spg")) # get first 1D spectrum nd0 = dataset[0, 1550.0:1600.0] # plot generic 1D nd0.plot() nd0.plot_scatter(plottitle=True) nd0.plot_scatter(marker="^", markevery=10, title="scatter+marker") prefs = nd0.preferences prefs.method_1D = "scatter+pen" nd0.plot(title="xxxx") prefs.method_1D = "pen" nd0.plot(marker="o", markevery=10, title="with marker") # plot 1D column col = dataset[:, 3500.0] # note the indexing using wavenumber! _ = col.plot_scatter() _ = col.plot_scatter(uselabel=True)
def test_read_carroucell(monkeypatch): # Before testing we need to download the data locally if not yet done: # because read carrousel is not designed to download itself. # Use the read_remote for that: NDDataset.read_remote("irdata/carroucell_samp", replace_existing=False) nd = NDDataset.read_carroucell("irdata/carroucell_samp", spectra=(1, 2)) for x in nd: info_(" " + x.name + ": " + str(x.shape)) assert len(nd) == 11 assert nd[3].shape == (2, 11098) nd = NDDataset.read_carroucell("irdata/carroucell_samp", spectra=(1, 1)) assert isinstance(nd, NDDataset) monkeypatch.setattr(spectrochempy.core, "open_dialog", dialog_carroucell) monkeypatch.setenv("KEEP_DIALOGS", "True") nd = NDDataset.read_carroucell(spectra=(1, 3)) assert nd[3].shape == (3, 11098) nd = NDDataset.read_carroucell(spectra=(2, 3), discardbg=False) assert nd[3].shape == (2, 11098)
def test_plotly2D(): A = NDDataset.read_omnic('irdata/nh4y-activation.spg', directory=prefs.datadir) A.y -= A.y[0] A.y.to('hour', inplace=True) A.y.title = u'Aquisition time'
def test_read_quadera(): # single file A = NDDataset.read_quadera('msdata/ion_currents.asc') assert str(A) == 'NDDataset: [float64] A (shape: (y:16975, x:10))'
def fake_dataset(*args, size=3, **kwargs): if not args: ds = NDDataset([range(size)]) else: ds = NDDataset([[range(4)]], ) return ds
def test_1D(): dataset = NDDataset.read_omnic( os.path.join(prefs.datadir, 'irdata', 'nh4y-activation.spg')) # get first spectrum nd0 = dataset[0] # plot generic nd0.plot() # nd0.plot(output=os.path.join(figures_dir, 'IR_dataset_1D'), # savedpi=150) # # # plot generic style # nd0.plot(style='poster', # output=os.path.join(figures_dir, 'IR_dataset_1D_poster'), # savedpi=150) # # # check that style reinit to default # nd0.plot(output='IR_dataset_1D', savedpi=150) # # try: # # assert same_images('IR_dataset_1D.png', # # os.path.join(figures_dir, 'IR_dataset_1D.png')) # # except AssertionError: # # os.remove('IR_dataset_1D.png') # # raise AssertionError('comparison fails') # # os.remove('IR_dataset_1D.png') # # # try other type of plots # nd0.plot_pen() # nd0[:, ::100].plot_scatter() # nd0.plot_lines() # nd0[:, ::100].plot_bar() # # show() # # # multiple # d = dataset[:, ::100] # datasets = [d[0], d[10], d[20], d[50], d[53]] # labels = ['sample {}'.format(label) for label in # ["S1", "S10", "S20", "S50", "S53"]] # # # plot multiple # plot_multiple(method='scatter', # datasets=datasets, labels=labels, legend='best', # output=os.path.join(figures_dir, # 'multiple_IR_dataset_1D_scatter'), # savedpi=150) # # # plot mupltiple with style # plot_multiple(method='scatter', style='sans', # datasets=datasets, labels=labels, legend='best', # output=os.path.join(figures_dir, # 'multiple_IR_dataset_1D_scatter_sans'), # savedpi=150) # # # check that style reinit to default # plot_multiple(method='scatter', # datasets=datasets, labels=labels, legend='best', # output='multiple_IR_dataset_1D_scatter', # savedpi=150) # try: # assert same_images('multiple_IR_dataset_1D_scatter', # os.path.join(figures_dir, # 'multiple_IR_dataset_1D_scatter')) # except AssertionError: # os.remove('multiple_IR_dataset_1D_scatter.png') # raise AssertionError('comparison fails') # os.remove('multiple_IR_dataset_1D_scatter.png') # plot 1D column col = dataset[:, 3500.] # note the indexing using wavenumber! _ = col.plot_scatter() show()
def test_read_quadera(): # single file A = NDDataset.read_quadera(MSDATA / "ion_currents.asc") assert str(A) == "NDDataset: [float64] A (shape: (y:16975, x:10))"
def test_read_opus(): # single file A = NDDataset.read_opus(os.path.join('irdata', 'OPUS', 'test.0000')) assert A.shape == (1, 2567) assert A[0, 2303.8694].data == pytest.approx(2.72740, 0.00001) # using a window path A1 = NDDataset.read_opus('irdata\\OPUS\\test.0000') assert A1.shape == (1, 2567) # single file specified with pathlib datadir = Path(prefs.datadir) p = datadir / 'irdata' / 'OPUS' / 'test.0000' A2 = NDDataset.read_opus(p) assert A2.shape == (1, 2567) # multiple files not merged B = NDDataset.read_opus('test.0000', 'test.0001', 'test.0002', directory=os.path.join('irdata', 'OPUS')) assert isinstance(B, NDDataset) assert len(B) == 3 # multiple files merged as the merge keyword is set to true C = scp.read_opus('test.0000', 'test.0001', 'test.0002', directory=os.path.join('irdata', 'OPUS'), merge=True) assert C.shape == (3, 2567) # multiple files to merge : they are passed as a list) D = NDDataset.read_opus(['test.0000', 'test.0001', 'test.0002'], directory=os.path.join('irdata', 'OPUS')) assert D.shape == (3, 2567) # multiple files not merged : they are passed as a list but merge is set to false E = scp.read_opus(['test.0000', 'test.0001', 'test.0002'], directory=os.path.join('irdata', 'OPUS'), merge=False) assert isinstance(E, list) assert len(E) == 3 # read contents p = datadir / 'irdata' / 'OPUS' / 'test.0000' content = p.read_bytes() F = NDDataset.read_opus({ p.name: content }) assert F.name == p.name assert F.shape == (1, 2567) # read multiple contents lst = [datadir / 'irdata' / 'OPUS' / f'test.000{i}' for i in range(3)] G = NDDataset.read_opus({p.name: p.read_bytes() for p in lst}) assert len(G) == 3 # read multiple contents and merge them lst = [datadir / 'irdata' / 'OPUS' / f'test.000{i}' for i in range(3)] H = NDDataset.read_opus({p.name: p.read_bytes() for p in lst}, merge=True) assert H.shape == (3, 2567) # read without filename -> open a dialog K = NDDataset.read_opus() # read in a directory (assume homogeneous type of data - else we must use the read function instead) K = NDDataset.read_opus(datadir / 'irdata' / 'OPUS') assert K.shape == (4, 2567) # again we can use merge to avoid stacking of all 4 spectra J = NDDataset.read_opus(datadir / 'irdata' / 'OPUS', merge=False) assert isinstance(J, list) assert len(J) == 4 # single opus file using generic read function # if the protocol is given it is similar to the read_opus function F = NDDataset.read(os.path.join('irdata', 'OPUS', 'test.0000'), protocol='opus') assert F.shape == (1, 2567) # No protocol? G = NDDataset.read(os.path.join('irdata', 'OPUS', 'test.0000')) assert G.shape == (1, 2567)
def test_read(): f = Path('irdata/OPUS/test.0000') A1 = NDDataset.read_opus(f) assert A1.shape == (1, 2567) # single file read with protocol specified A2 = NDDataset.read(f, protocol='opus') assert A2 == A1 A3 = scp.read('irdata/nh4y-activation.spg', protocol='omnic') assert str(A3) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))' # single file without protocol # inferred from filename A4 = NDDataset.read(f) assert A4 == A1 A5 = scp.read('irdata/nh4y-activation.spg') assert str(A5) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))' # native format f = A5.save_as('nh4y.scp') A6 = scp.read('irdata/nh4y.scp') assert str(A6) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))' A7 = scp.read('nh4y', directory='irdata', protocol='scp') assert str(A7) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))' A8 = scp.read('nh4y', directory='irdata') assert str(A8) == 'NDDataset: [float64] a.u. (shape: (y:55, x:5549))' f.unlink() # multiple compatible 1D files automatically merged B = NDDataset.read('test.0000', 'test.0001', 'test.0002', directory=os.path.join('irdata', 'OPUS')) assert str(B) == 'NDDataset: [float64] a.u. (shape: (y:3, x:2567))' assert len(B) == 3 # multiple compatible 1D files not merged if the merge keyword is set to False C = scp.read('test.0000', 'test.0001', 'test.0002', directory=os.path.join('irdata', 'OPUS'), merge=False) assert isinstance(C, list) # multiple 1D files to merge D = NDDataset.read(['test.0000', 'test.0001', 'test.0002'], directory=os.path.join('irdata', 'OPUS')) assert D.shape == (3, 2567) # multiple 1D files not merged : they are passed as a list but merge is set to false E = scp.read(['test.0000', 'test.0001', 'test.0002'], directory=os.path.join('irdata', 'OPUS'), merge=False) assert isinstance(E, list) assert len(E) == 3 # read contents datadir = Path(prefs.datadir) p = datadir / 'irdata' / 'OPUS' / 'test.0000' content = p.read_bytes() F = NDDataset.read({p.name: content}) assert F.name == p.name assert F.shape == (1, 2567) # read multiple 1D contents and merge them lst = [datadir / 'irdata' / 'OPUS' / f'test.000{i}' for i in range(3)] G = NDDataset.read({p.name: p.read_bytes() for p in lst}) assert G.shape == (3, 2567) assert len(G) == 3 # read multiple 1D contents awithout merging lst = [datadir / 'irdata' / 'OPUS' / f'test.000{i}' for i in range(3)] H = NDDataset.read({p.name: p.read_bytes() for p in lst}, merge=False) isinstance(H, list) assert len(H) == 3 filename = datadir / 'wodger.spg' content = filename.read_bytes() # change the filename to be sure that the file will be read from the passed content filename = 'try.spg' # The most direct way to pass the byte content information nd = NDDataset.read(filename, content=content) assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))' # It can also be passed using a dictionary structure {filename:content, ....} nd = NDDataset.read({filename: content}) assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))' # Case where the filename is not provided nd = NDDataset.read(content) assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))' # Try with an .spa file filename = datadir / 'irdata/subdir/7_CZ0-100 Pd_101.SPA' content = filename.read_bytes() filename = 'try.spa' filename2 = datadir / 'irdata/subdir/7_CZ0-100 Pd_102.SPA' content2 = filename2.read_bytes() filename = 'try2.spa' nd = NDDataset.read({filename: content}) assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:1, x:5549))' # Try with only a .spa content nd = NDDataset.read(content) assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:1, x:5549))' # Try with several .spa content (should be stacked into a single nddataset) nd = NDDataset.read({filename: content, filename2: content2}) assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))' nd = NDDataset.read(content, content2) assert str(nd) == 'NDDataset: [float64] a.u. (shape: (y:2, x:5549))'
def test_plotly2D(): A = NDDataset.read_omnic("irdata/nh4y-activation.spg", directory=prefs.datadir) A.y -= A.y[0] A.y.to("hour", inplace=True) A.y.title = "Acquisition time"
# ---- # Let's change this name # %% proj.name = 'myNMRdata' proj # %% [markdown] # Now we will add a dataset to the project. # # First we read the dataset (here some NMR data) and we give it some name (e.g. 'nmr n°1') # %% datadir = pathclean(prefs.datadir) path = datadir / 'nmrdata' / 'bruker' / 'tests' / 'nmr' nd1 = NDDataset.read_topspin(path / 'topspin_1d', expno=1, remove_digital_filter=True, name="NMR_1D") nd2 = NDDataset.read_topspin(path / 'topspin_2d', expno=1, remove_digital_filter=True, name='NMR_2D') # %% [markdown] # To add it to the project, we use the `add_dataset` function for a single dataset: # %% proj.add_datasets(nd1) # %% [markdown] # or `add_datasets` for several datasets. # %% proj.add_datasets(nd1, nd2) # %% [markdown]
# In the first syntax we load the library into a namespace called `scp` # (we recommend this name, but you can choose whatever # you want - except something already in use): # %% import spectrochempy as scp # SYNTAX 1 nd = scp.NDDataset() # %% [markdown] # or in the second syntax, with a wild `*` import. # %% from spectrochempy import * # SYNTAX 2 nd = NDDataset() # %% [markdown] # With the second syntax, as often in python, the access to objects/functions can be greatly simplified. For example, # we can use directly `NDDataset` without a prefix instead of `scp.NDDataset` # which is the first syntax but there is always a # risk of overwriting some variables or functions already present in the namespace. # Therefore, the first syntax is generally highly recommended. # # Alternatively, you can also load only the objects and function required by your application: # # %% from spectrochempy import NDDataset # SYNTAX 3 nd = NDDataset()
# * `mask`: Data can be partially masked at will # * `units`: Data can have units, allowing units-aware operations # * `coordset`: Data can have a set of coordinates, one or several by dimensions # # Additional metadata can also be added to the instances of this class through the `meta` properties. # %% [markdown] # ## 1D-Dataset (unidimensional dataset) # %% [markdown] # In the following example, a minimal 1D dataset is created from a simple list, to which we can add some metadata: # %% d1D = NDDataset( [10.0, 20.0, 30.0], name="Dataset N1", author="Blake and Mortimer", description="A dataset from scratch", ) d1D # %% [markdown] # <div class='alert alert-info'> # <b>Note</b> # # In the above code, run in a notebook, the output of d1D is in html for a nice display. # # To get the same effect, from a console script, one can use `print_` (with an underscore) and not the usual python # function `print`. As you can see below, the `print` function only gives a short summary of the information, # while the `print_` method gives more detailed output # # </div>
def test_importer(monkeypatch, fs): fs.create_file("/var/data/xx1.txt") assert os.path.exists("/var/data/xx1.txt") # mock filesystem fs.create_dir(DATADIR) # try to read unexistent scp file f = DATADIR / "fakedir/fakescp.scp" with pytest.raises(FileNotFoundError): read(f) # make fake file fs.create_file(f) monkeypatch.setattr(NDDataset, "load", fake_dataset) nd = read(f) assert nd == fake_dataset(f) nd = read(f.stem, directory=DATADIR / "fakedir/", protocol="scp") assert nd == fake_dataset(f) nd = read(f.stem, directory=DATADIR / "fakedir/") assert nd == fake_dataset(f) # Generic read without parameters and dialog cancel monkeypatch.setattr(spectrochempy.core, "open_dialog", dialog_cancel) monkeypatch.setenv( "KEEP_DIALOGS", "True") # we ask to display dialogs as we will mock them. nd = read() assert nd is None # read as class method nd1 = NDDataset.read() assert nd1 is None # NDDataset instance as first arguments nd = NDDataset() nd2 = nd.read() assert nd2 is None nd = read(default_filter="matlab") assert nd is None # Check if Filetype is not known f = DATADIR / "fakedir/not_exist_fake.fk" with pytest.raises(TypeError): read_fake(f) # Make fake type acceptable FILETYPES.append(("fake", "FAKE files (*.fk)")) ALIAS.append(("fk", "fake")) monkeypatch.setattr("spectrochempy.core.readers.importer.FILETYPES", FILETYPES) monkeypatch.setattr("spectrochempy.core.readers.importer.ALIAS", ALIAS) # Check not existing filename f = DATADIR / "fakedir/not_exist_fake.fk" with pytest.raises(FileNotFoundError): read_fake(f) # Generic read with a wrong protocol with pytest.raises(spectrochempy.utils.exceptions.ProtocolError): read(f, protocol="wrongfake") # Generic read with a wrong file extension with pytest.raises(TypeError): g = DATADIR / "fakedir/otherfake.farfelu" read(g) # Mock file f = DATADIR / "fakedir/fake.fk" fs.create_file(f) # specific read_(protocol) function nd = read_fk(f) assert nd == fake_dataset() # should also be a Class function nd = NDDataset.read_fk(f) assert nd == fake_dataset() # and a NDDataset instance function nd = NDDataset().read_fk(f) assert nd == fake_dataset() # single file without protocol inferred from filename nd = read(f) assert nd == fake_dataset() # single file read with protocol specified nd = read(f, protocol="fake") assert nd == fake_dataset() # attribute a new name nd = read(f, name="toto") assert nd.name == "toto" # mock some fake file and assume they exists f1 = DATADIR / "fakedir/fake1.fk" f2 = DATADIR / "fakedir/fake2.fk" f3 = DATADIR / "fakedir/fake3.fk" f4 = DATADIR / "fakedir/fake4.fk" f5 = DATADIR / "fakedir/otherdir/otherfake.fk" f6 = DATADIR / "fakedir/emptyfake.fk" # return None when reader fs.create_file(f1) fs.create_file(f2) fs.create_file(f3) fs.create_file(f4) fs.create_file(f5) fs.create_file(f6) # l = list(pathclean("/Users/christian/test_data/fakedir").iterdir()) # multiple compatible 1D files automatically merged nd = read(f1, f2, f3) assert nd.shape == (3, 3) nd = read([f1, f2, f3], name="fake_merged") assert nd.shape == (3, 3) assert nd.name == "fake_merged" # multiple compatible 1D files not merged if the merge keyword is set to False nd = read([f1, f2, f3], names=["a", "c", "b"], merge=False) assert isinstance(nd, list) assert len(nd) == 3 and nd[0] == fake_dataset() assert nd[1].name == "c" # do not merge inhomogeneous dataset nd = read([f1, f2, f5]) assert isinstance(nd, list) # too short list of names. Not applied nd = read([f1, f2, f3], names=["a", "c"], merge=False) assert nd[0].name.startswith("NDDataset") monkeypatch.setattr(spectrochempy.core, "open_dialog", dialog_open) nd = ( read() ) # should open a dialog (but to selects individual filename (here only simulated) assert nd.shape == (2, 3) # read in a directory monkeypatch.setattr(pathlib.Path, "glob", directory_glob) # directory selection nd = read(protocol="fake", directory=DATADIR / "fakedir") assert nd.shape == (4, 3) nd = read(protocol="fake", directory=DATADIR / "fakedir", merge=False) assert len(nd) == 4 assert isinstance(nd, list) nd = read(listdir=True, directory=DATADIR / "fakedir") assert len(nd) == 4 assert not isinstance(nd, list) # if a directory is passed as a keyword, the behavior is different: # a dialog for file selection occurs except if listdir is set to True nd = read(directory=DATADIR / "fakedir", listdir=False) assert nd.shape == (2, 3) # -> file selection dialog nd = read(directory=DATADIR / "fakedir", listdir=True) assert nd.shape == (4, 3) # -> directory selection dialog # read_dir() nd = read_dir(DATADIR / "fakedir") assert nd.shape == (4, 3) nd1 = read_dir() assert nd1 == nd nd = read_dir( directory=DATADIR / "fakedir" ) # open a dialog to eventually select directory inside the specified # one assert nd.shape == (4, 3) fs.create_file(DATADIR / "fakedir/subdir/fakesub1.fk") nd = read_dir(directory=DATADIR / "fakedir", recursive=True) assert nd.shape == (5, 3) # no merging nd = read_dir(directory=DATADIR / "fakedir", recursive=True, merge=False) assert len(nd) == 5 assert isinstance(nd, list) # Simulate reading a content nd = read({"somename.fk": "a fake content"}) assert nd == fake_dataset(content=True) nd = read_fake({"somename.fk": "a fake content"}) assert nd == fake_dataset(content=True) # read multiple contents and merge them nd = read({ "somename.fk": "a fake content", "anothername.fk": "another fake content", "stillanothername.fk": "still another fake content", }) assert nd.shape == (3, 3) # do not merge nd = read( { "somename.fk": "a fake content", "anothername.fk": "another fake content" }, merge=False, ) assert isinstance(nd, list) assert len(nd) == 2
# To load the API, you must import it using one of the following syntax. # # In the first syntax we load the library into a namespace called `scp` (you can choose whatever you want - except # something already in use): # %% import spectrochempy as scp # SYNTAX 1 nd = scp.NDDataset() # %% [markdown] # or in the second syntax, with a wild `*` import. # %% from spectrochempy import * nd = NDDataset() # %% [markdown] # With the second syntax, as often in python, the access to objects/functions can be greatly simplified. For example, # we can use "NDDataset" without a prefix instead of `scp.NDDataset` which is the first syntax) but there is always a # risk of overwriting some variables or functions already present in the namespace. # Therefore, the first syntax is generally highly recommended. # # Alternatively, you can also load only the onbjects and function required by your application: # # %% from spectrochempy import NDDataset nd = NDDataset()
# handling spectroscopic information, one of the major objectives of the SpectroChemPy package: # # * `mask`: Data can be partially masked at will # * `units`: Data can have units, allowing units-aware operations # * `coordset`: Data can have a set of coordinates, one or sevral by dimensions # # Additional metadata can also be added to the instances of this class through the `meta` properties. # %% [markdown] # ## 1D-Dataset (unidimensional dataset) # %% [markdown] # In the following example, a minimal 1D dataset is created from a simple list, to which we can add some metadata: # %% d1D = NDDataset([10., 20., 30.], name="Dataset N1", author='Blake and Mortimer', description='A dataset from scratch') d1D # %% [markdown] # <div class='alert alert-info'> # <b>Note</b> # # In the above code, run in a notebook, the output of d1D is in html for a nice display. # # To get the same effect, from a console script, one can use `print_` (with an underscore) and not the usual python # function `print`. As you can see below, the `print` function only gives a short summary of the information, # while the `print_` method gives more detailed output # # </div> # %%