def find_constants(open_path): """ This is the main loop which runs until the user no selects any file. Retrieves the saved model for separating the data. :param open_path: the default location to start your search :return: the ending location the folder search was looking at """ if not os.path.exists(MODEL_FILE): easygui.msgbox( "There are no models to use to classify the data. Please train algorithm first." ) return clf = joblib.load(MODEL_FILE) if is_empty_model(clf): easygui.msgbox( "The model has not been fitted yet. Please fit data to the model.") return while True: file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv') if file: open_path = "{0:s}\*.csv".format(os.path.dirname(file)) file_data = get_data(file) legacy_log = is_valid_log(file_data, visualize.LEGACY_COLUMNS) current_log = is_valid_log(file_data) if legacy_log or current_log: if legacy_log and not current_log: easygui.msgbox( "Because this log is missing information that makes it optimal " "for manipulating the data efficiently results may be inaccurate" ) # TODO make it so that when closing the figure using the GUI it reopens normally plot = ConstantViewer(clf) plot.graph(file_data) plot.show() else: easygui.msgbox("The file {0:s} is not a valid file.".format( os.path.basename(file))) else: break plt.close("all") return open_path
def main(open_path): while True: files = easygui.fileopenbox('Please locate csv files', 'Specify File', default=open_path, filetypes='*.csv', multiple=True) if files: open_path = "{0:s}\*.csv".format(os.path.dirname(files[0])) csv_files = {} for file in files: file_data = get_data(file) if is_valid_log(file_data): try: name = datetime.strptime(os.path.basename(file), "%Y-%m-%d %H-%M-%S.csv") except ValueError: name = os.path.basename(file).split(".")[0] csv_files[name] = file_data else: easygui.msgbox( "The file {0:s} is not a valid file it will be removed." .format(os.path.basename(file))) plot_graphs(csv_files) else: return open_path
def train_model(open_path): """ :param open_path: :return: """ # TODO x = motor power, y velocity, z time while True: file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv') if file: open_path = "{0:s}\*.csv".format(os.path.dirname(file)) file_data = get_data(file) # TODO make this loop thought the steps as many times as they are number of paths if is_valid_log(file_data, visualize.LEGACY_COLUMNS): x, _ = get_features(file_data) outlier = OutlierAndScalingSelection(file_data, x) outlier.show() del outlier else: break
def find_constants(open_path): """ :param open_path: :return: """ if not os.path.exists(MODEL_FILE): easygui.msgbox("There are no models to use to classify the data. Please train algorithm first.") return clf = joblib.load(MODEL_FILE) if is_empty_model(clf): easygui.msgbox("The model has not been fitted yet. Please add training data to the model.") return fig = plt.figure("Scaled 3d data") ax3d = Axes3D(fig) fig, ax2d = plt.subplots(1, 1, num="Fitted data") # plt.ion() while True: file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv') if file: open_path = "{0:s}\*.csv".format(os.path.dirname(file)) file_data = get_data(file) if is_valid_log(file_data): ax2d.cla() ax3d.cla() plot_hyperplane(clf, ax3d) k_v, k_k, k_acc = find_gain(clf, file_data, is_data=True, ax3d=ax3d, ax2d=ax2d) # TODO ask user to give the max acceleration of the current spline # TODO scale k_acc / () plt.show() easygui.msgbox(""" The kV of this log is {0:f}. The kK of this log is {1:f}. The kAcc of this log is {2:f}.""".format(k_v, k_k, k_acc)) else: easygui.msgbox( "The file {0:s} is not a valid file.".format(os.path.basename(file))) else: break plt.ioff() plt.close("all") return open_path
def train_model(open_path): """ :param open_path: :return: """ # TODO x = motor power, y velocity, z time while True: files = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv', multiple=True) if files: open_path = "{0:s}\*.csv".format(os.path.dirname(files[0])) all_features = None all_data = None for file in files: file_data = get_data(file) if is_valid_log(file_data, LEGACY_COLUMNS): features, _ = get_features(file_data) time = MinMaxScaler().fit_transform(features[:, 2].reshape( -1, 1)) features[:, 2] = time.reshape(1, -1) if all_features is None: all_features = features all_data = file_data else: all_features = np.concatenate((all_features, features)) file_data['pathNumber'] += ( all_data["pathNumber"].max() + 1) all_data = np.concatenate((all_data, file_data)) else: easygui.msgbox( "The file {0:s} is not a valid file, it will not be plotted." .format(os.path.basename(file))) go_through_process(all_features, all_data) else: break
def main(open_path): """ This is the main loop which runs until the user no selects any file. :param open_path: the default location to start your search :return: the ending location the folder search was looking at """ while True: files = easygui.fileopenbox('Please locate csv files', 'Specify File', default=open_path, filetypes='*.csv', multiple=True) if files: open_path = "{0:s}\*.csv".format(os.path.dirname(files[0])) csv_files = {} for file in files: file_data = get_data(file) if is_valid_log(file_data, NEEDED_KEYS): try: name = datetime.strptime(os.path.basename(file), "%Y-%m-%d %H-%M-%S.csv") except ValueError: name = os.path.basename(file).split(".")[0] csv_files[name] = file_data else: easygui.msgbox( "The file {0:s} is not a valid file, it will not be plotted." .format(os.path.basename(file))) plots = Plot(csv_files) plots.show() plots.close_all() else: return open_path
def train_model(open_path): """ :param open_path: :return: """ # TODO add lasso selection of points for data that was not classified manually. # TODO Should be able to select outliers and what side is positive or not # TODO create 2d plots for every dimension and use lasso selection from there fig = plt.figure("Complete classifier") ax3d = Axes3D(fig) ax3d.set_xlabel('Average motor power') ax3d.set_ylabel('Velocity') ax3d.set_zlabel('Time') total_data = {} already_used_files = set() changed_anything = False hyperplane = None plt.ion() if os.path.exists(MODEL_FILE): answer = easygui.boolbox("A model already exists do you wish to use it?") if answer is None: return elif answer: clf = joblib.load(MODEL_FILE) hyperplane = plot_hyperplane(clf, ax3d) data = np.load(MODEL_DATA_FILE) total_data["features"] = data["features"] total_data["labels"] = data["labels"] accelerating = total_data["features"][total_data["labels"] == 0] decelerating = total_data["features"][total_data["labels"] == 1] ax3d.scatter(accelerating[:, 0], accelerating[:, 1], accelerating[:, 2], c="red", label="acceleration") ax3d.scatter(decelerating[:, 0], decelerating[:, 1], decelerating[:, 2], c="blue", label="deceleration") already_used_files.add(*data["files"]) plt.show() else: clf = create_blank_classifier() changed_anything = True else: clf = create_blank_classifier() while True: file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv') if file: open_path = "{0:s}\*.csv".format(os.path.dirname(file)) file_data = get_data(file) # TODO make this loop thought the steps as many times as they are number of paths if is_valid_log(file_data, visualize.LEGACY_COLUMNS): x, _ = get_features(file_data) y = get_labels(file_data) # x = x[file_data["motionState"] == 'MOVING'] # y = y[file_data["motionState"] == 'MOVING'] outlier = IsolationForest(n_jobs=-1, random_state=0) temp_y = y[y != OUTLIER] = 1 outlier.fit(x, temp_y) prediction = outlier.predict(x) # outlier = LocalOutlierFactor(n_jobs=-1, ) # outlier = EllipticEnvelope(random_state=0) # prediction = outlier.fit_predict(x) y[prediction == OUTLIER] = OUTLIER outliers = x[y == OUTLIER] accelerating = x[y == ACCELERATING] decelerating = x[y == DECELERATING] outlier_power, outlier_velocity, outlier_time = separate_feature(outliers) accelerating_power, accelerating_velocity, accelerating_time = separate_feature(accelerating) decelerating_power, decelerating_velocity, decelerating_time = separate_feature(decelerating) temp_fig = plt.figure(os.path.basename(file).split(".")[0]) temp_ax = Axes3D(temp_fig) temp_ax.set_xlabel('Average motor power') temp_ax.set_ylabel('Velocity') temp_ax.set_zlabel('Time') outlier_line = temp_ax.scatter(outlier_power, outlier_velocity, outlier_time, c="black", label="outliers") acceleration_line = temp_ax.scatter(accelerating_power, accelerating_velocity, accelerating_time, c="red", label="accelerating") deceleration_line = temp_ax.scatter(decelerating_power, decelerating_velocity, decelerating_time, c="blue", label="decelerating") plt.show() easygui.msgbox("Next without outliers and rescaled") x = x[prediction != OUTLIER] y = y[prediction != OUTLIER] x = MinMaxScaler().fit_transform(x) outlier_line.remove() acceleration_line.remove() deceleration_line.remove() accelerating = x[y == ACCELERATING] decelerating = x[y == DECELERATING] accelerating_power, accelerating_velocity, accelerating_time = separate_feature(accelerating) decelerating_power, decelerating_velocity, decelerating_time = separate_feature(decelerating) acceleration_line = temp_ax.scatter(accelerating_power, accelerating_velocity, accelerating_time, c="red", label="accelerating") deceleration_line = temp_ax.scatter(decelerating_power, decelerating_velocity, decelerating_time, c="blue", label="decelerating") # train, test, train_L, test_L = train_test_split(x, y, train_size=.8, test_size=.2, random_state=0, # shuffle=True) # clf.fit(train, train_L) clf.fit(x, y) plot_hyperplane(clf, temp_ax) if len(total_data) == 0: total_data = {"features": x, "labels": y} changed_anything = True elif file not in already_used_files: new_x = np.concatenate((total_data["features"], x)) new_y = np.concatenate((total_data["labels"], y)) temp_x = np.hstack((new_x, new_y.reshape((-1, 1)))) temp_x = np.unique(temp_x, axis=0) new_x = temp_x[:, :-1] new_y = temp_x[:, -1] total_data["features"] = new_x total_data["labels"] = new_y.ravel() clf.fit(total_data["features"], total_data["labels"]) changed_anything = True if file not in already_used_files: # FIXME can this just be in a single if statement? ax3d.scatter(accelerating[:, 0], accelerating[:, 1], accelerating[:, 2], c="red", label="positive") ax3d.scatter(decelerating[:, 0], decelerating[:, 1], decelerating[:, 2], c="blue", label="negative") if hyperplane is not None: hyperplane.remove() hyperplane = plot_hyperplane(clf, ax3d) already_used_files.append(file) else: easygui.msgbox( "The file {0:s} is not a valid file.".format(os.path.basename(file))) else: break if changed_anything and not is_empty_model(clf): joblib.dump(clf, MODEL_FILE) np.savez(MODEL_DATA_FILE, features=total_data["features"], labels=total_data["labels"], files=already_used_files) easygui.msgbox("Model saved.") plt.close("all") return open_path