from ValidationUtils import cv_benchmark_model # %% # impor data # specs = pd.read_csv('./luzrawSpectra/nirMatrix.csv') # cut spectra specs = pd.read_csv("/Users/maxprem/nirPy/calData_full.csv") # full spectra lab = pd.read_excel("/Users/maxprem/nirGit/nirpy/luzrawSpectra/labData.xlsx") # input wavenumber to cut spectra specs = cut_specs(specs, 4100, 5500) # specs = cut_specs(specs, 4100, 5500) # %% X, y, wave_number, ref = importLuzCol(specs, lab, 4) # split dataset in train and test data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42 ) # %% # transformation pipeline # scale y y_scaler = GlobalStandardScaler() y_train = y_scaler.fit_transform(y_train) y_test = y_scaler.transform(y_test)
opt_comp = optimal_n_comp(X, y, n_comp) opt_model = pls_regression(X, y, opt_comp) pls_scores(X, y, opt_model) #specs = pd.read_csv("./luzrawSpectra/nirMatrix.csv") # cut spectra specs = pd.read_csv("/Users/maxprem/nirPy/calData_full.csv") # full spectra lab = pd.read_excel("/Users/maxprem/nirGit/nirpy/luzrawSpectra/labData.xlsx") # input wavenumber to specs = cut_specs(specs, 4100, 5500) # specs = cut_specs(specs, 4100, 5500) X, y, wl, ref = importLuzCol(specs, lab, 2) # splitting dataset """to be continued with test set""" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) ######################### # scaling and transformingfrom ChemUtils import EmscScaler, GlobalStandardScaler, SavgolFilter # scale y y_scaler = GlobalStandardScaler() y_train = y_scaler.fit_transform(y_train) y_test = y_scaler.transform(y_test)