def as7262_outliers(data, scatter_correction=None): data_columns = data[as7262_wavelengths] print(data_columns) # data_columns.T.plot() # plt.plot(data_columns.T) plt.show() if scatter_correction == "SNV": data_columns = processing.snv(data_columns) elif scatter_correction == "MSC": data_columns, _ = processing.msc(data_columns) # svm = OneClassSVM().fit_predict(snv_data) # print(svm) robust_cov = MinCovDet().fit(data_columns) mahal_dist = robust_cov.mahalanobis(data_columns) # mahal_dist = MahalanobisDist(np.array(data_columns), verbose=True) print(mahal_dist) zscore(data_columns) print('+++++') mean = np.mean(mahal_dist) std = 3*np.std(mahal_dist) print(mean, std) print(mean - std, mean + std) zscore_mahal = (mahal_dist - mean) / np.std(mahal_dist) # print(zscore_mahal) # print(zscore_mahal.max(), zscore_mahal.argmax(), data_columns.loc[zscore_mahal.argmax()]) print('pppp') print(data_columns) print(zscore_mahal.argmax()) outliers = data_columns.loc[zscore_mahal > 3].index outliers = data_columns.iloc[zscore_mahal.argmax()].name # print(data_columns.loc[zscore_mahal > 3].index) rows = data_columns.loc[outliers] # print(data_columns.loc[zscore_mahal.argmax()].name) print(data_columns.shape) print(rows) # print((mahal_dist-mahal_dist.mean()).std()) # print(mahal_dist.std()) # print(mahal_dist.mean() + 3*mahal_dist.std()) # mahal_dist2 = MahalanobisDist(np.array(data_columns), verbose=True) n, bins, _ = plt.hist(zscore_mahal, bins=40) plt.show() # x_hist = np.linspace(min(mahal_dist), max(mahal_dist), 100) # # popt, pcov = curve_fit(gauss_function, bins[:len(n)], n, maxfev=100000, p0=[300, 0, 20]) # new_fit = gauss_function(x_hist, *popt) # plt.plot(x_hist, new_fit, 'r--') # color = data_columns.shape[0] * ["#000000"] # color[data_columns.loc[zscore_mahal.argmax()].name] = "#FF0000" plt.plot(data_columns.T, c="black") plt.plot(rows.T, c="red") plt.plot(data_columns.mean(), c="blue", lw=4) # snv_data.T.plot(color=color) plt.show()
# # wavelength=data_columns[l], # # invert_y=False) # if invert_y: # y = 1 / y # fit_values, _ = curve_fit(model, x, y, maxfev=10 ** 6) # y_fit = model(x, *fit_values) # if invert_y: # y = 1 / y # y_fit = 1 / y_fit # r2 = r2_score(y, y_fit) # mae = mean_absolute_error(y, y_fit) # print(r2, mae) # except: # pass x_msc, _ = processing.msc(x_data) x_inv_msc = 1 / x_msc.copy() data_sets = [ x_data.copy(), 1 / x_data, processing.snv(x_data), 1 / processing.snv(x_data), x_msc, x_inv_msc, StandardScaler().fit_transform(x_data), StandardScaler().fit_transform(processing.snv(x_data)), StandardScaler().fit_transform(x_msc), RobustScaler().fit_transform(x_data), RobustScaler().fit_transform(processing.snv(x_data)) ] data_set_names = [ "raw", "inverse", "SNV", "Invert SNV", "MSC", "inverse msc", "standard scalar", "Standard Scalar SNV", "Standard Scalar MSC", "Robust Scalar", "Robust Scalar SNV", "Robust Scalar MSC" ]
except: pass with pd.ExcelWriter(filename, mode='a') as writer: results_df.to_excel(writer, sheet_name=sheet_name) ys = { "Normal Y": _y, "Inverse Y": 1 / _y, "Log Y": np.log(_y), "Inverse Log Y": np.log(1 / _y), "Exp Y": np.exp(-_y), } ys = {"Normal Y": _y} mcs_x, _ = processing.msc(x_data) print(mcs_x) Xs = {"Normal X": x_data, "SNV": processing.snv(x_data)} # Xs = {"MSC": mcs_x} with pd.ExcelWriter(filename, mode='w') as writer: results_df.to_excel(writer) for y_name, y_inner in ys.items(): for x_name, x_inner in Xs.items(): new_sheet = x_name + ' ' + y_name run_scan(x_inner, y_inner, new_sheet) print(regressors) print(training_scores) print(test_scores)
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(7, 9), constrained_layout=True) fig.suptitle("AS7263 measurements of Cananga") print(spectrum_data.shape) print(data.columns) # axes[0].plot(wavelengths, spectrum_data.T) axes[0].plot(spectrum_data.T) # axes[0].set_ylim([0, 200]) axes[0].set_title('Raw Data') axes[0].set_ylabel("Raw Sensor Counts") axes[0].annotate("A", xy=(.04, 0.80), xycoords='axes fraction', size=24) [axes[0].lines[i].set_color(color) for i, color in enumerate(colors)] snv_data = processing.snv(spectrum_data) axes[1].plot(wavelengths, snv_data.T, color=colors) axes[1].set_title('Standard Normal Variate Data') axes[1].annotate("B", xy=(.04, 0.80), xycoords='axes fraction', size=24) [axes[1].lines[i].set_color(color) for i, color in enumerate(colors)] msc_data, _ = processing.msc(spectrum_data) axes[2].plot(wavelengths, msc_data.T, color=colors) axes[2].set_title("Multiplicative Scatter Correction Data") axes[2].set_xlabel("Wavelength (nm)") axes[2].annotate("C", xy=(.04, 0.80), xycoords='axes fraction', size=24) [axes[2].lines[i].set_color(color) for i, color in enumerate(colors)] plt.show()
BACKGROUND = [ 687.65, 9453.7, 23218.35, 9845.05, 15496.7, 18118.55, 7023.8, 7834.1, 28505.9, 4040.9, 5182.3, 1282.55, 2098.85, 1176.1, 994.45, 496.45, 377.55, 389.75 ] x, y = get_data.get_data("mango", "as7265x", int_time=150, position=2, led="b'White'", led_current="25 mA") y = y['Avg Total Chlorophyll (µg/cm2)'] x_reflect = x / BACKGROUND x_snv = processing.snv(x_reflect) x_msc, _ = processing.msc(x_reflect) x_robust = RobustScaler().fit_transform(x_msc) plt.style.use('dark_background') pls = PLS(n_components=6) pls.fit(x, y) x_fit = pls.predict(x) pls.fit(x_msc, y) svr = SVR() svr.fit(x_msc, y) print(svr.score(x, y)) ridge = RidgeCV() ridge.fit(x_msc, y) print(pls.score(x, y))
if __name__ == "__main__": for sensor in sensors: for leaf in leafs: x, y = get_data.get_data(leaf, sensor, int_time=150, position=2, led_current="25 mA") print(y.columns) y_column = 'Total Chlorophyll (µg/cm2)' # y_column = 'Total Chlorophyll (µg/mg)' if y_column not in y.columns: y_column = 'Avg Total Chlorophyll (µg/cm2)' # y_column = 'Avg Total Chlorophyll (µg/mg)' y = y[y_column] print(x) x_msc, _ = processing.msc(x) x_snv = processing.snv(x) xs = [("Normal", x), ("MSC", x_msc), ("SNV", x_snv), ("Inv", 1 / x), ("Log", np.log(x)), ("Inv Log", np.log(1 / x))] name = f"{sensor}_{leaf}" for processor_name, new_x in xs: print(processor_name) print(new_x) run_scan(new_x, y, name, processor_name)