def manipulate_features(self, features: np.ndarray, file_data: np.ndarray) -> (np.ndarray, np.ndarray): """ Return the features manipulated in a way as to make the algorithm for separating the data more accurate. :param features: the features to use :param file_data: the log file's data :return: the manipulated features array, the outliers of the data set and the data scaler """ if contains_key(file_data, "motionState"): moving_mask = file_data["motionState"] == "MOVING" features = features[moving_mask] file_data = file_data[moving_mask] new_features = None scalers = {} if contains_key(file_data, "pathNumber"): for i in range(file_data["pathNumber"].min(), file_data["pathNumber"].max() + 1): min_max_scaler = MinMaxScaler() path_number = file_data["pathNumber"] == i scalers[min_max_scaler] = path_number features_at_path = features[path_number] half = features_at_path.shape[0] // 2 coefficient, _ = find_linear_best_fit_line(features_at_path[:half, 2], features_at_path[:half, 0]) if coefficient < 0: features_at_path[:, 0] *= - 1 features_at_path = min_max_scaler.fit_transform(features_at_path) outliers_free_features = features_at_path if new_features is None: new_features = outliers_free_features else: new_features = np.concatenate((new_features, outliers_free_features), 0) else: min_max_scaler = MinMaxScaler() scalers[min_max_scaler] = np.full(features.shape[0], True) new_features = min_max_scaler.fit_transform(features) outlier_detector = OneClassSVM(gamma=10) # Seems to work best outlier_detector.fit(new_features) outlier_prediction = outlier_detector.predict(new_features) outliers = new_features[outlier_prediction == -1] new_features = new_features[outlier_prediction == 1] features = self.reverse_scalling(new_features, scalers, outlier_prediction) if self.show_outliers: plot_hyperplane(outlier_detector, self.master_plot, interval=.04, colors="orange") return new_features, outliers, features
def find_constants(open_path): """ :param open_path: :return: """ if not os.path.exists(MODEL_FILE): easygui.msgbox("There are no models to use to classify the data. Please train algorithm first.") return clf = joblib.load(MODEL_FILE) if is_empty_model(clf): easygui.msgbox("The model has not been fitted yet. Please add training data to the model.") return fig = plt.figure("Scaled 3d data") ax3d = Axes3D(fig) fig, ax2d = plt.subplots(1, 1, num="Fitted data") # plt.ion() while True: file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv') if file: open_path = "{0:s}\*.csv".format(os.path.dirname(file)) file_data = get_data(file) if is_valid_log(file_data): ax2d.cla() ax3d.cla() plot_hyperplane(clf, ax3d) k_v, k_k, k_acc = find_gain(clf, file_data, is_data=True, ax3d=ax3d, ax2d=ax2d) # TODO ask user to give the max acceleration of the current spline # TODO scale k_acc / () plt.show() easygui.msgbox(""" The kV of this log is {0:f}. The kK of this log is {1:f}. The kAcc of this log is {2:f}.""".format(k_v, k_k, k_acc)) else: easygui.msgbox( "The file {0:s} is not a valid file.".format(os.path.basename(file))) else: break plt.ioff() plt.close("all") return open_path
def plot_3d_plot(self, features, headers, labels): """ PLots the features in a 3d plot including the hyperplane that separates the data :param features: the features to use to plot in the graph :param headers: the axis titles :param labels: the color of each data point """ self.master_plot.scatter(features[:, 0], features[:, 1], features[:, 2], c=labels) self.master_plot.set_xlabel(headers[0]) self.master_plot.set_ylabel(headers[1]) self.master_plot.set_zlabel(headers[2]) plot_hyperplane(self.clf, self.master_plot, colors='orange')
def show(self): """ Shows the figure """ if not self.showing: self.fig = plt.figure("Scaled 3d data") fig_manager = plt.get_current_fig_manager() fig_manager.window.showMaximized() self.gs = GridSpec(3, 4, self.fig) self.master_plot = self.fig.add_subplot(self.gs[:3, :3], projection='3d') self.time_velocity = self.fig.add_subplot(self.gs[0, -1]) self.time_power = self.fig.add_subplot(self.gs[1, -1]) self.power_velocity = self.fig.add_subplot(self.gs[2, -1]) self.gs.tight_layout(self.fig) self.clear_graphs() plot_subplots( self.new_scaled_features, self.headers, (self.time_velocity, self.time_power, self.power_velocity), self.color_labels) self.plot_3d_plot(self.new_scaled_features, self.headers, self.color_labels) if self.show_outliers: self.master_plot.scatter(self.outliers[:, 0], self.outliers[:, 1], self.outliers[:, 2], c="black") plot_hyperplane(self.outlier_detector, self.master_plot, interval=.04, colors="orange") self.show_constants_graph(self.features, self.file_data, self.labels, c=self.color_labels) self.fig.show() plt.show() self.showing = True
def go_through_process(all_features: np.ndarray, all_data: np.ndarray): selector = remove_outliers(all_features) all_features = all_features[selector.indexes] all_data = all_data[selector.indexes] all_features, features, selector = manipulate_features_and_remove_outliers( all_features, all_data) all_features = all_features[selector.indexes] all_data = all_data[selector.indexes] selector = select_accelerating_vs_decelerating(all_features) clf = SVC() clf.fit(all_features, selector.get_labels()) graphs = Graphs(all_features, title="Hyperplane") plot_hyperplane(clf, graphs.all_features_axes) plt.show()
outlier_prediction = clf.predict(features) outliers = features[outlier_prediction == -1] outlier_free = features[outlier_prediction == 1] color_label = color_label[outlier_prediction == 1] master_plot.scatter(outlier_free[:, 0], outlier_free[:, 1], outlier_free[:, 2], c=color_label) master_plot.scatter(outliers[:, 0], outliers[:, 1], outliers[:, 2], c="black") plot_hyperplane(clf, master_plot, interval=.05) gs.tight_layout(fig) figManager = plt.get_current_fig_manager() figManager.window.showMaximized() # plt.show() fig.show() plt.show() # fig.clear() Axes3D(fig) fig.show() plt.show(fig)
import matplotlib.pyplot as plt from sklearn.svm import OneClassSVM from visualize import helper from visualize.helper import get_features, plot_hyperplane data = helper.get_data(r"..\example_data\2018-03-21 08-29-04.csv") features, col = get_features(data) ax3d = plt.gca(projection='3d') ax3d.scatter(features[:, 0], features[:, 1], features[:, 2]) clf = OneClassSVM(degree=10) clf.fit(features) plot_hyperplane(clf, ax3d, interval=.01) plt.show()
def train_model(open_path): """ :param open_path: :return: """ # TODO add lasso selection of points for data that was not classified manually. # TODO Should be able to select outliers and what side is positive or not # TODO create 2d plots for every dimension and use lasso selection from there fig = plt.figure("Complete classifier") ax3d = Axes3D(fig) ax3d.set_xlabel('Average motor power') ax3d.set_ylabel('Velocity') ax3d.set_zlabel('Time') total_data = {} already_used_files = set() changed_anything = False hyperplane = None plt.ion() if os.path.exists(MODEL_FILE): answer = easygui.boolbox("A model already exists do you wish to use it?") if answer is None: return elif answer: clf = joblib.load(MODEL_FILE) hyperplane = plot_hyperplane(clf, ax3d) data = np.load(MODEL_DATA_FILE) total_data["features"] = data["features"] total_data["labels"] = data["labels"] accelerating = total_data["features"][total_data["labels"] == 0] decelerating = total_data["features"][total_data["labels"] == 1] ax3d.scatter(accelerating[:, 0], accelerating[:, 1], accelerating[:, 2], c="red", label="acceleration") ax3d.scatter(decelerating[:, 0], decelerating[:, 1], decelerating[:, 2], c="blue", label="deceleration") already_used_files.add(*data["files"]) plt.show() else: clf = create_blank_classifier() changed_anything = True else: clf = create_blank_classifier() while True: file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv') if file: open_path = "{0:s}\*.csv".format(os.path.dirname(file)) file_data = get_data(file) # TODO make this loop thought the steps as many times as they are number of paths if is_valid_log(file_data, visualize.LEGACY_COLUMNS): x, _ = get_features(file_data) y = get_labels(file_data) # x = x[file_data["motionState"] == 'MOVING'] # y = y[file_data["motionState"] == 'MOVING'] outlier = IsolationForest(n_jobs=-1, random_state=0) temp_y = y[y != OUTLIER] = 1 outlier.fit(x, temp_y) prediction = outlier.predict(x) # outlier = LocalOutlierFactor(n_jobs=-1, ) # outlier = EllipticEnvelope(random_state=0) # prediction = outlier.fit_predict(x) y[prediction == OUTLIER] = OUTLIER outliers = x[y == OUTLIER] accelerating = x[y == ACCELERATING] decelerating = x[y == DECELERATING] outlier_power, outlier_velocity, outlier_time = separate_feature(outliers) accelerating_power, accelerating_velocity, accelerating_time = separate_feature(accelerating) decelerating_power, decelerating_velocity, decelerating_time = separate_feature(decelerating) temp_fig = plt.figure(os.path.basename(file).split(".")[0]) temp_ax = Axes3D(temp_fig) temp_ax.set_xlabel('Average motor power') temp_ax.set_ylabel('Velocity') temp_ax.set_zlabel('Time') outlier_line = temp_ax.scatter(outlier_power, outlier_velocity, outlier_time, c="black", label="outliers") acceleration_line = temp_ax.scatter(accelerating_power, accelerating_velocity, accelerating_time, c="red", label="accelerating") deceleration_line = temp_ax.scatter(decelerating_power, decelerating_velocity, decelerating_time, c="blue", label="decelerating") plt.show() easygui.msgbox("Next without outliers and rescaled") x = x[prediction != OUTLIER] y = y[prediction != OUTLIER] x = MinMaxScaler().fit_transform(x) outlier_line.remove() acceleration_line.remove() deceleration_line.remove() accelerating = x[y == ACCELERATING] decelerating = x[y == DECELERATING] accelerating_power, accelerating_velocity, accelerating_time = separate_feature(accelerating) decelerating_power, decelerating_velocity, decelerating_time = separate_feature(decelerating) acceleration_line = temp_ax.scatter(accelerating_power, accelerating_velocity, accelerating_time, c="red", label="accelerating") deceleration_line = temp_ax.scatter(decelerating_power, decelerating_velocity, decelerating_time, c="blue", label="decelerating") # train, test, train_L, test_L = train_test_split(x, y, train_size=.8, test_size=.2, random_state=0, # shuffle=True) # clf.fit(train, train_L) clf.fit(x, y) plot_hyperplane(clf, temp_ax) if len(total_data) == 0: total_data = {"features": x, "labels": y} changed_anything = True elif file not in already_used_files: new_x = np.concatenate((total_data["features"], x)) new_y = np.concatenate((total_data["labels"], y)) temp_x = np.hstack((new_x, new_y.reshape((-1, 1)))) temp_x = np.unique(temp_x, axis=0) new_x = temp_x[:, :-1] new_y = temp_x[:, -1] total_data["features"] = new_x total_data["labels"] = new_y.ravel() clf.fit(total_data["features"], total_data["labels"]) changed_anything = True if file not in already_used_files: # FIXME can this just be in a single if statement? ax3d.scatter(accelerating[:, 0], accelerating[:, 1], accelerating[:, 2], c="red", label="positive") ax3d.scatter(decelerating[:, 0], decelerating[:, 1], decelerating[:, 2], c="blue", label="negative") if hyperplane is not None: hyperplane.remove() hyperplane = plot_hyperplane(clf, ax3d) already_used_files.append(file) else: easygui.msgbox( "The file {0:s} is not a valid file.".format(os.path.basename(file))) else: break if changed_anything and not is_empty_model(clf): joblib.dump(clf, MODEL_FILE) np.savez(MODEL_DATA_FILE, features=total_data["features"], labels=total_data["labels"], files=already_used_files) easygui.msgbox("Model saved.") plt.close("all") return open_path