def as7262_outliers(data, scatter_correction=None):
    data_columns = data[as7262_wavelengths]
    print(data_columns)
    # data_columns.T.plot()
    # plt.plot(data_columns.T)
    plt.show()
    if scatter_correction == "SNV":
        data_columns = processing.snv(data_columns)
    elif scatter_correction == "MSC":
        data_columns, _ = processing.msc(data_columns)

    # svm = OneClassSVM().fit_predict(snv_data)
    # print(svm)
    robust_cov = MinCovDet().fit(data_columns)
    mahal_dist = robust_cov.mahalanobis(data_columns)
    # mahal_dist = MahalanobisDist(np.array(data_columns), verbose=True)
    print(mahal_dist)


    zscore(data_columns)
    print('+++++')
    mean = np.mean(mahal_dist)
    std = 3*np.std(mahal_dist)
    print(mean, std)
    print(mean - std, mean + std)
    zscore_mahal = (mahal_dist - mean) / np.std(mahal_dist)
    # print(zscore_mahal)
    # print(zscore_mahal.max(), zscore_mahal.argmax(), data_columns.loc[zscore_mahal.argmax()])
    print('pppp')
    print(data_columns)
    print(zscore_mahal.argmax())
    outliers = data_columns.loc[zscore_mahal > 3].index
    outliers = data_columns.iloc[zscore_mahal.argmax()].name
    # print(data_columns.loc[zscore_mahal > 3].index)
    rows = data_columns.loc[outliers]
    # print(data_columns.loc[zscore_mahal.argmax()].name)
    print(data_columns.shape)
    print(rows)

    # print((mahal_dist-mahal_dist.mean()).std())
    # print(mahal_dist.std())
    # print(mahal_dist.mean() + 3*mahal_dist.std())
    # mahal_dist2 = MahalanobisDist(np.array(data_columns), verbose=True)
    n, bins, _ = plt.hist(zscore_mahal, bins=40)
    plt.show()

    # x_hist = np.linspace(min(mahal_dist), max(mahal_dist), 100)
    #
    # popt, pcov = curve_fit(gauss_function, bins[:len(n)], n, maxfev=100000, p0=[300, 0, 20])
    # new_fit = gauss_function(x_hist, *popt)
    # plt.plot(x_hist, new_fit, 'r--')
    # color = data_columns.shape[0] * ["#000000"]
    # color[data_columns.loc[zscore_mahal.argmax()].name] = "#FF0000"
    plt.plot(data_columns.T, c="black")
    plt.plot(rows.T, c="red")
    plt.plot(data_columns.mean(), c="blue", lw=4)
    # snv_data.T.plot(color=color)
    plt.show()
Example #2
0
model_names = [
    'Linear model', "Logarithm model", "Exponential model", "Polynomial model"
]
# models = [linear_model]
# model_names = ['Linear model']

chloro_columns = [
    'Chlorophyll a (ug/ml)', 'Chlorophyll b (ug/ml)',
    'Total Chlorophyll (ug/ml)'
]
y_name = ['Chlorophyll a (ug/ml)']

letters = ["A", "B", "C", "D", "E", "F"]
print(x_data.index)
x_data = processing.snv(x_data)

best_score = 0
best_conditions = None
good_sets = []

invert_y = True
# for led in data['LED'].unique():
#     print(led)
#     for i, y_name in enumerate(chloro_columns):
#         for j, model in enumerate(models):
#
#             # figure, axes, = plt.subplots(3, 2, figsize=(7.5, 8.75), constrained_layout=True)
#             # axes = [axes[0][0], axes[0][1], axes[1][0],
#             #         axes[1][1], axes[2][0], axes[2][1]]
#             # figure.suptitle("{0} measured with AS7263\n and {1}".format(y_name, led), size=20,
Example #3
0
    with pd.ExcelWriter(filename, mode='a') as writer:
        results_df.to_excel(writer, sheet_name=sheet_name)


ys = {
    "Normal Y": _y,
    "Inverse Y": 1 / _y,
    "Log Y": np.log(_y),
    "Inverse Log Y": np.log(1 / _y),
    "Exp Y": np.exp(-_y),
}
ys = {"Normal Y": _y}
mcs_x, _ = processing.msc(x_data)
print(mcs_x)

Xs = {"Normal X": x_data, "SNV": processing.snv(x_data)}

# Xs = {"MSC": mcs_x}
with pd.ExcelWriter(filename, mode='w') as writer:
    results_df.to_excel(writer)
for y_name, y_inner in ys.items():
    for x_name, x_inner in Xs.items():
        new_sheet = x_name + ' ' + y_name
        run_scan(x_inner, y_inner, new_sheet)

print(regressors)
print(training_scores)
print(test_scores)
print(results_df)
results_df.to_csv("as7262_betal_results.csv")
Example #4
0
fig, axes = plt.subplots(nrows=3,
                         ncols=1,
                         figsize=(7, 9),
                         constrained_layout=True)
fig.suptitle("AS7263 measurements of Cananga")
print(spectrum_data.shape)
print(data.columns)
# axes[0].plot(wavelengths, spectrum_data.T)
axes[0].plot(spectrum_data.T)
# axes[0].set_ylim([0, 200])
axes[0].set_title('Raw Data')
axes[0].set_ylabel("Raw Sensor Counts")
axes[0].annotate("A", xy=(.04, 0.80), xycoords='axes fraction', size=24)
[axes[0].lines[i].set_color(color) for i, color in enumerate(colors)]

snv_data = processing.snv(spectrum_data)

axes[1].plot(wavelengths, snv_data.T, color=colors)
axes[1].set_title('Standard Normal Variate Data')
axes[1].annotate("B", xy=(.04, 0.80), xycoords='axes fraction', size=24)
[axes[1].lines[i].set_color(color) for i, color in enumerate(colors)]

msc_data, _ = processing.msc(spectrum_data)
axes[2].plot(wavelengths, msc_data.T, color=colors)
axes[2].set_title("Multiplicative Scatter Correction Data")
axes[2].set_xlabel("Wavelength (nm)")
axes[2].annotate("C", xy=(.04, 0.80), xycoords='axes fraction', size=24)
[axes[2].lines[i].set_color(color) for i, color in enumerate(colors)]
plt.show()
Example #5
0
import processing

BACKGROUND = [
    687.65, 9453.7, 23218.35, 9845.05, 15496.7, 18118.55, 7023.8, 7834.1,
    28505.9, 4040.9, 5182.3, 1282.55, 2098.85, 1176.1, 994.45, 496.45, 377.55,
    389.75
]
x, y = get_data.get_data("mango",
                         "as7265x",
                         int_time=150,
                         position=2,
                         led="b'White'",
                         led_current="25 mA")
y = y['Avg Total Chlorophyll (µg/cm2)']
x_reflect = x / BACKGROUND
x_snv = processing.snv(x_reflect)
x_msc, _ = processing.msc(x_reflect)
x_robust = RobustScaler().fit_transform(x_msc)
plt.style.use('dark_background')

pls = PLS(n_components=6)

pls.fit(x, y)
x_fit = pls.predict(x)
pls.fit(x_msc, y)
svr = SVR()
svr.fit(x_msc, y)
print(svr.score(x, y))

ridge = RidgeCV()
ridge.fit(x_msc, y)
Example #6
0
if __name__ == "__main__":
    for sensor in sensors:
        for leaf in leafs:
            x, y = get_data.get_data(leaf,
                                     sensor,
                                     int_time=150,
                                     position=2,
                                     led_current="25 mA")
            print(y.columns)
            y_column = 'Total Chlorophyll (µg/cm2)'
            # y_column = 'Total Chlorophyll (µg/mg)'
            if y_column not in y.columns:
                y_column = 'Avg Total Chlorophyll (µg/cm2)'
                # y_column = 'Avg Total Chlorophyll (µg/mg)'
            y = y[y_column]
            print(x)
            x_msc, _ = processing.msc(x)
            x_snv = processing.snv(x)

            xs = [("Normal", x), ("MSC", x_msc),
                  ("SNV", x_snv), ("Inv", 1 / x), ("Log", np.log(x)),
                  ("Inv Log", np.log(1 / x))]

            name = f"{sensor}_{leaf}"
            for processor_name, new_x in xs:
                print(processor_name)
                print(new_x)

                run_scan(new_x, y, name, processor_name)