def as7262_outliers(data, scatter_correction=None):
    data_columns = data[as7262_wavelengths]
    print(data_columns)
    # data_columns.T.plot()
    # plt.plot(data_columns.T)
    plt.show()
    if scatter_correction == "SNV":
        data_columns = processing.snv(data_columns)
    elif scatter_correction == "MSC":
        data_columns, _ = processing.msc(data_columns)

    # svm = OneClassSVM().fit_predict(snv_data)
    # print(svm)
    robust_cov = MinCovDet().fit(data_columns)
    mahal_dist = robust_cov.mahalanobis(data_columns)
    # mahal_dist = MahalanobisDist(np.array(data_columns), verbose=True)
    print(mahal_dist)


    zscore(data_columns)
    print('+++++')
    mean = np.mean(mahal_dist)
    std = 3*np.std(mahal_dist)
    print(mean, std)
    print(mean - std, mean + std)
    zscore_mahal = (mahal_dist - mean) / np.std(mahal_dist)
    # print(zscore_mahal)
    # print(zscore_mahal.max(), zscore_mahal.argmax(), data_columns.loc[zscore_mahal.argmax()])
    print('pppp')
    print(data_columns)
    print(zscore_mahal.argmax())
    outliers = data_columns.loc[zscore_mahal > 3].index
    outliers = data_columns.iloc[zscore_mahal.argmax()].name
    # print(data_columns.loc[zscore_mahal > 3].index)
    rows = data_columns.loc[outliers]
    # print(data_columns.loc[zscore_mahal.argmax()].name)
    print(data_columns.shape)
    print(rows)

    # print((mahal_dist-mahal_dist.mean()).std())
    # print(mahal_dist.std())
    # print(mahal_dist.mean() + 3*mahal_dist.std())
    # mahal_dist2 = MahalanobisDist(np.array(data_columns), verbose=True)
    n, bins, _ = plt.hist(zscore_mahal, bins=40)
    plt.show()

    # x_hist = np.linspace(min(mahal_dist), max(mahal_dist), 100)
    #
    # popt, pcov = curve_fit(gauss_function, bins[:len(n)], n, maxfev=100000, p0=[300, 0, 20])
    # new_fit = gauss_function(x_hist, *popt)
    # plt.plot(x_hist, new_fit, 'r--')
    # color = data_columns.shape[0] * ["#000000"]
    # color[data_columns.loc[zscore_mahal.argmax()].name] = "#FF0000"
    plt.plot(data_columns.T, c="black")
    plt.plot(rows.T, c="red")
    plt.plot(data_columns.mean(), c="blue", lw=4)
    # snv_data.T.plot(color=color)
    plt.show()
예제 #2
0
#                     #            wavelength=data_columns[l],
#                     #            invert_y=False)
#                     if invert_y:
#                         y = 1 / y
#                     fit_values, _ = curve_fit(model, x, y, maxfev=10 ** 6)
#                     y_fit = model(x, *fit_values)
#                     if invert_y:
#                         y = 1 / y
#                         y_fit = 1 / y_fit
#                     r2 = r2_score(y, y_fit)
#                     mae = mean_absolute_error(y, y_fit)
#                     print(r2, mae)
#                 except:
#                     pass

x_msc, _ = processing.msc(x_data)
x_inv_msc = 1 / x_msc.copy()
data_sets = [
    x_data.copy(), 1 / x_data,
    processing.snv(x_data), 1 / processing.snv(x_data), x_msc, x_inv_msc,
    StandardScaler().fit_transform(x_data),
    StandardScaler().fit_transform(processing.snv(x_data)),
    StandardScaler().fit_transform(x_msc),
    RobustScaler().fit_transform(x_data),
    RobustScaler().fit_transform(processing.snv(x_data))
]
data_set_names = [
    "raw", "inverse", "SNV", "Invert SNV", "MSC", "inverse msc",
    "standard scalar", "Standard Scalar SNV", "Standard Scalar MSC",
    "Robust Scalar", "Robust Scalar SNV", "Robust Scalar MSC"
]
예제 #3
0
            except:
                pass

    with pd.ExcelWriter(filename, mode='a') as writer:
        results_df.to_excel(writer, sheet_name=sheet_name)


ys = {
    "Normal Y": _y,
    "Inverse Y": 1 / _y,
    "Log Y": np.log(_y),
    "Inverse Log Y": np.log(1 / _y),
    "Exp Y": np.exp(-_y),
}
ys = {"Normal Y": _y}
mcs_x, _ = processing.msc(x_data)
print(mcs_x)

Xs = {"Normal X": x_data, "SNV": processing.snv(x_data)}

# Xs = {"MSC": mcs_x}
with pd.ExcelWriter(filename, mode='w') as writer:
    results_df.to_excel(writer)
for y_name, y_inner in ys.items():
    for x_name, x_inner in Xs.items():
        new_sheet = x_name + ' ' + y_name
        run_scan(x_inner, y_inner, new_sheet)

print(regressors)
print(training_scores)
print(test_scores)
예제 #4
0
fig, axes = plt.subplots(nrows=3,
                         ncols=1,
                         figsize=(7, 9),
                         constrained_layout=True)
fig.suptitle("AS7263 measurements of Cananga")
print(spectrum_data.shape)
print(data.columns)
# axes[0].plot(wavelengths, spectrum_data.T)
axes[0].plot(spectrum_data.T)
# axes[0].set_ylim([0, 200])
axes[0].set_title('Raw Data')
axes[0].set_ylabel("Raw Sensor Counts")
axes[0].annotate("A", xy=(.04, 0.80), xycoords='axes fraction', size=24)
[axes[0].lines[i].set_color(color) for i, color in enumerate(colors)]

snv_data = processing.snv(spectrum_data)

axes[1].plot(wavelengths, snv_data.T, color=colors)
axes[1].set_title('Standard Normal Variate Data')
axes[1].annotate("B", xy=(.04, 0.80), xycoords='axes fraction', size=24)
[axes[1].lines[i].set_color(color) for i, color in enumerate(colors)]

msc_data, _ = processing.msc(spectrum_data)
axes[2].plot(wavelengths, msc_data.T, color=colors)
axes[2].set_title("Multiplicative Scatter Correction Data")
axes[2].set_xlabel("Wavelength (nm)")
axes[2].annotate("C", xy=(.04, 0.80), xycoords='axes fraction', size=24)
[axes[2].lines[i].set_color(color) for i, color in enumerate(colors)]
plt.show()
예제 #5
0
BACKGROUND = [
    687.65, 9453.7, 23218.35, 9845.05, 15496.7, 18118.55, 7023.8, 7834.1,
    28505.9, 4040.9, 5182.3, 1282.55, 2098.85, 1176.1, 994.45, 496.45, 377.55,
    389.75
]
x, y = get_data.get_data("mango",
                         "as7265x",
                         int_time=150,
                         position=2,
                         led="b'White'",
                         led_current="25 mA")
y = y['Avg Total Chlorophyll (µg/cm2)']
x_reflect = x / BACKGROUND
x_snv = processing.snv(x_reflect)
x_msc, _ = processing.msc(x_reflect)
x_robust = RobustScaler().fit_transform(x_msc)
plt.style.use('dark_background')

pls = PLS(n_components=6)

pls.fit(x, y)
x_fit = pls.predict(x)
pls.fit(x_msc, y)
svr = SVR()
svr.fit(x_msc, y)
print(svr.score(x, y))

ridge = RidgeCV()
ridge.fit(x_msc, y)
print(pls.score(x, y))
예제 #6
0
if __name__ == "__main__":
    for sensor in sensors:
        for leaf in leafs:
            x, y = get_data.get_data(leaf,
                                     sensor,
                                     int_time=150,
                                     position=2,
                                     led_current="25 mA")
            print(y.columns)
            y_column = 'Total Chlorophyll (µg/cm2)'
            # y_column = 'Total Chlorophyll (µg/mg)'
            if y_column not in y.columns:
                y_column = 'Avg Total Chlorophyll (µg/cm2)'
                # y_column = 'Avg Total Chlorophyll (µg/mg)'
            y = y[y_column]
            print(x)
            x_msc, _ = processing.msc(x)
            x_snv = processing.snv(x)

            xs = [("Normal", x), ("MSC", x_msc),
                  ("SNV", x_snv), ("Inv", 1 / x), ("Log", np.log(x)),
                  ("Inv Log", np.log(1 / x))]

            name = f"{sensor}_{leaf}"
            for processor_name, new_x in xs:
                print(processor_name)
                print(new_x)

                run_scan(new_x, y, name, processor_name)