dataset """ import spectrochempy as scp ############################################################ # Load a dataset dataset = scp.read_omnic("irdata/nh4y-activation.spg") print(dataset) dataset.plot_stack() ############################################################## # Create a PCA object pca = scp.PCA(dataset, centered=False) ############################################################## # Reduce the dataset to a lower dimensionality (number of # components is automatically determined) S, LT = pca.reduce(n_pc=0.99) print(LT) ############################################################### # Finally, display the results graphically # ScreePlot _ = pca.screeplot() ########################################################################################################################
import spectrochempy as scp import sys ############################################################ # Upload a dataset form a distant server try: dataset = scp.download_iris() except (IOError, OSError): print("Could not load The `IRIS` dataset. Finishing here.") sys.exit(0) ############################################################## # Create a PCA object pca = scp.PCA(dataset, centered=True) ############################################################## # Reduce the data to a lower dimensionality. Here, the number of # components is automatically determined using `n_pc="auto"`. As # indicated by the dimension of LT, 4 PC are found. S, LT = pca.reduce(n_pc="auto") print(LT) ############################################################### # The figures of merit (explained and cumulative variance) confirm that # these 4 PC's explain 100% of the variance: # pca.printev()
# %% [markdown] # ### Guessing the concentration profile with PCA + EFA # # Generally, in MCR ALS, the initial guess cannot be obtained independently of the experimental data 'x'. # In such a case, one has to rely on 'X' to obtained (i) the number of pure species and (ii) their initial # concentrations or spectral profiles. The number of of pure species can be assessed by carrying out a PCA on the # data while the concentrations or spectral profiles can be estimated using procedures such EFA of SIMPLISMA. # The following will illustrate the use of PCA followed by EFA # # #### Use of PCA to assess the number of pure species # # Let's first analyse our dataset using PCA and plot a screeplot: # %% pca = scp.PCA(X) pca.printev(n_pc=10) _ = pca.screeplot(n_pc=8) # %% [markdown] # The number of significant PC's is clearly larger or equal to 2. It is, however, difficult tto determine whether # it should be set to 3 or 4... Let's look at the score and loading matrices: # # %% S, LT = pca.reduce(n_pc=8) _ = S.T.plot() _ = LT.plot() # %% [markdown] # Examination of the scores and loadings indicate that the 4th component has structured, non random scores and loadings.
# %% [markdown] # ### Guessing the concentration profile with PCA + EFA # # Generally, in MCR ALS, the initial guess cannot be obtained independently of the experimental data 'x'. # In such a case, one has to rely on 'X' to obtained (i) the number of pure species and (ii) their initial # concentrations or spectral profiles. The number of pure species can be assessed by carrying out a PCA on the # data while the concentrations or spectral profiles can be estimated using procedures such EFA of SIMPLISMA. # The following will illustrate the use of PCA followed by EFA # # #### Use of PCA to assess the number of pure species # # Let's first analyse our dataset using PCA and plot a screeplot: # %% pca = scp.PCA(X) pca.printev(n_pc=10) _ = pca.screeplot(n_pc=8) # %% [markdown] # The number of significant PC's is clearly larger or equal to 2. It is, however, difficult tto determine whether # it should be set to 3 or 4... Let's look at the score and loading matrices: # # %% S, LT = pca.reduce(n_pc=8) _ = S.T.plot() _ = LT.plot() # %% [markdown] # Examination of the scores and loadings indicate that the 4th component has structured, nonrandom scores and loadings.