def manipulate_features(self, features: np.ndarray, file_data: np.ndarray) -> (np.ndarray, np.ndarray):
        """
    Return the features manipulated in a way as to make the algorithm for separating the data more accurate.
        :param features: the features to use
        :param file_data: the log file's data
        :return: the manipulated features array, the outliers of the data set and the data scaler
        """

        if contains_key(file_data, "motionState"):
            moving_mask = file_data["motionState"] == "MOVING"
            features = features[moving_mask]
            file_data = file_data[moving_mask]

        new_features = None
        scalers = {}
        if contains_key(file_data, "pathNumber"):

            for i in range(file_data["pathNumber"].min(), file_data["pathNumber"].max() + 1):
                min_max_scaler = MinMaxScaler()

                path_number = file_data["pathNumber"] == i
                scalers[min_max_scaler] = path_number

                features_at_path = features[path_number]

                half = features_at_path.shape[0] // 2
                coefficient, _ = find_linear_best_fit_line(features_at_path[:half, 2], features_at_path[:half, 0])

                if coefficient < 0:
                    features_at_path[:, 0] *= - 1

                features_at_path = min_max_scaler.fit_transform(features_at_path)
                outliers_free_features = features_at_path

                if new_features is None:
                    new_features = outliers_free_features
                else:
                    new_features = np.concatenate((new_features, outliers_free_features), 0)
        else:
            min_max_scaler = MinMaxScaler()
            scalers[min_max_scaler] = np.full(features.shape[0], True)
            new_features = min_max_scaler.fit_transform(features)

        outlier_detector = OneClassSVM(gamma=10)  # Seems to work best

        outlier_detector.fit(new_features)
        outlier_prediction = outlier_detector.predict(new_features)
        outliers = new_features[outlier_prediction == -1]
        new_features = new_features[outlier_prediction == 1]

        features = self.reverse_scalling(new_features, scalers, outlier_prediction)

        if self.show_outliers:
            plot_hyperplane(outlier_detector, self.master_plot, interval=.04, colors="orange")

        return new_features, outliers, features
def find_constants(open_path):
    """

    :param open_path:
    :return:
    """
    if not os.path.exists(MODEL_FILE):
        easygui.msgbox("There are no models to use to classify the data. Please train algorithm first.")
        return

    clf = joblib.load(MODEL_FILE)

    if is_empty_model(clf):
        easygui.msgbox("The model has not been fitted yet. Please add training data to the model.")
        return

    fig = plt.figure("Scaled 3d data")
    ax3d = Axes3D(fig)
    fig, ax2d = plt.subplots(1, 1, num="Fitted data")

    # plt.ion()

    while True:
        file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv')

        if file:
            open_path = "{0:s}\*.csv".format(os.path.dirname(file))

            file_data = get_data(file)

            if is_valid_log(file_data):
                ax2d.cla()
                ax3d.cla()

                plot_hyperplane(clf, ax3d)

                k_v, k_k, k_acc = find_gain(clf, file_data, is_data=True, ax3d=ax3d, ax2d=ax2d)

                # TODO ask user to give the max acceleration of the current spline
                # TODO scale k_acc / ()
                plt.show()

                easygui.msgbox("""
                The kV of this log is {0:f}.
                The kK of this log is {1:f}.
                The kAcc of this log is {2:f}.""".format(k_v, k_k, k_acc))
            else:
                easygui.msgbox(
                    "The file {0:s} is not a valid file.".format(os.path.basename(file)))

        else:
            break

    plt.ioff()
    plt.close("all")
    return open_path
    def plot_3d_plot(self, features, headers, labels):
        """
    PLots the features in a 3d plot including the hyperplane that separates the data
        :param features: the features to use to plot in the graph
        :param headers: the axis titles
        :param labels: the color of each data point
        """
        self.master_plot.scatter(features[:, 0], features[:, 1], features[:, 2], c=labels)
        self.master_plot.set_xlabel(headers[0])
        self.master_plot.set_ylabel(headers[1])
        self.master_plot.set_zlabel(headers[2])

        plot_hyperplane(self.clf, self.master_plot, colors='orange')
    def show(self):
        """
    Shows the figure
        """

        if not self.showing:
            self.fig = plt.figure("Scaled 3d  data")

            fig_manager = plt.get_current_fig_manager()
            fig_manager.window.showMaximized()

            self.gs = GridSpec(3, 4, self.fig)

            self.master_plot = self.fig.add_subplot(self.gs[:3, :3],
                                                    projection='3d')
            self.time_velocity = self.fig.add_subplot(self.gs[0, -1])
            self.time_power = self.fig.add_subplot(self.gs[1, -1])
            self.power_velocity = self.fig.add_subplot(self.gs[2, -1])

            self.gs.tight_layout(self.fig)
            self.clear_graphs()

            plot_subplots(
                self.new_scaled_features, self.headers,
                (self.time_velocity, self.time_power, self.power_velocity),
                self.color_labels)

            self.plot_3d_plot(self.new_scaled_features, self.headers,
                              self.color_labels)

            if self.show_outliers:
                self.master_plot.scatter(self.outliers[:, 0],
                                         self.outliers[:, 1],
                                         self.outliers[:, 2],
                                         c="black")
                plot_hyperplane(self.outlier_detector,
                                self.master_plot,
                                interval=.04,
                                colors="orange")

            self.show_constants_graph(self.features,
                                      self.file_data,
                                      self.labels,
                                      c=self.color_labels)

            self.fig.show()
            plt.show()
            self.showing = True
def go_through_process(all_features: np.ndarray, all_data: np.ndarray):
    selector = remove_outliers(all_features)

    all_features = all_features[selector.indexes]
    all_data = all_data[selector.indexes]

    all_features, features, selector = manipulate_features_and_remove_outliers(
        all_features, all_data)

    all_features = all_features[selector.indexes]
    all_data = all_data[selector.indexes]

    selector = select_accelerating_vs_decelerating(all_features)

    clf = SVC()
    clf.fit(all_features, selector.get_labels())

    graphs = Graphs(all_features, title="Hyperplane")
    plot_hyperplane(clf, graphs.all_features_axes)

    plt.show()
    outlier_prediction = clf.predict(features)

    outliers = features[outlier_prediction == -1]
    outlier_free = features[outlier_prediction == 1]
    color_label = color_label[outlier_prediction == 1]

    master_plot.scatter(outlier_free[:, 0],
                        outlier_free[:, 1],
                        outlier_free[:, 2],
                        c=color_label)
    master_plot.scatter(outliers[:, 0],
                        outliers[:, 1],
                        outliers[:, 2],
                        c="black")

    plot_hyperplane(clf, master_plot, interval=.05)

    gs.tight_layout(fig)

    figManager = plt.get_current_fig_manager()
    figManager.window.showMaximized()

    # plt.show()
    fig.show()
    plt.show()

    # fig.clear()
    Axes3D(fig)
    fig.show()
    plt.show(fig)
import matplotlib.pyplot as plt
from sklearn.svm import OneClassSVM

from visualize import helper
from visualize.helper import get_features, plot_hyperplane

data = helper.get_data(r"..\example_data\2018-03-21 08-29-04.csv")
features, col = get_features(data)

ax3d = plt.gca(projection='3d')
ax3d.scatter(features[:, 0], features[:, 1], features[:, 2])

clf = OneClassSVM(degree=10)

clf.fit(features)

plot_hyperplane(clf, ax3d, interval=.01)

plt.show()
def train_model(open_path):
    """

    :param open_path:
    :return:
    """
    # TODO add lasso selection of points for data that was not classified manually.
    # TODO Should be able to select outliers and what side is positive or not

    # TODO create 2d plots for every dimension and use lasso selection from there
    fig = plt.figure("Complete classifier")
    ax3d = Axes3D(fig)
    ax3d.set_xlabel('Average motor power')
    ax3d.set_ylabel('Velocity')
    ax3d.set_zlabel('Time')

    total_data = {}
    already_used_files = set()
    changed_anything = False
    hyperplane = None

    plt.ion()
    if os.path.exists(MODEL_FILE):
        answer = easygui.boolbox("A model already exists do you wish to use it?")

        if answer is None:
            return

        elif answer:
            clf = joblib.load(MODEL_FILE)
            hyperplane = plot_hyperplane(clf, ax3d)
            data = np.load(MODEL_DATA_FILE)
            total_data["features"] = data["features"]
            total_data["labels"] = data["labels"]

            accelerating = total_data["features"][total_data["labels"] == 0]
            decelerating = total_data["features"][total_data["labels"] == 1]

            ax3d.scatter(accelerating[:, 0], accelerating[:, 1], accelerating[:, 2], c="red",
                         label="acceleration")
            ax3d.scatter(decelerating[:, 0], decelerating[:, 1], decelerating[:, 2], c="blue",
                         label="deceleration")

            already_used_files.add(*data["files"])

            plt.show()
        else:
            clf = create_blank_classifier()
            changed_anything = True
    else:
        clf = create_blank_classifier()

    while True:
        file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv')

        if file:
            open_path = "{0:s}\*.csv".format(os.path.dirname(file))

            file_data = get_data(file)

            # TODO make this loop thought the steps as many times as they are number of paths
            if is_valid_log(file_data, visualize.LEGACY_COLUMNS):
                x, _ = get_features(file_data)
                y = get_labels(file_data)

                # x = x[file_data["motionState"] == 'MOVING']
                # y = y[file_data["motionState"] == 'MOVING']

                outlier = IsolationForest(n_jobs=-1, random_state=0)

                temp_y = y[y != OUTLIER] = 1
                outlier.fit(x, temp_y)
                prediction = outlier.predict(x)
                # outlier = LocalOutlierFactor(n_jobs=-1, )
                # outlier = EllipticEnvelope(random_state=0)
                # prediction = outlier.fit_predict(x)

                y[prediction == OUTLIER] = OUTLIER

                outliers = x[y == OUTLIER]
                accelerating = x[y == ACCELERATING]
                decelerating = x[y == DECELERATING]
                outlier_power, outlier_velocity, outlier_time = separate_feature(outliers)
                accelerating_power, accelerating_velocity, accelerating_time = separate_feature(accelerating)
                decelerating_power, decelerating_velocity, decelerating_time = separate_feature(decelerating)

                temp_fig = plt.figure(os.path.basename(file).split(".")[0])
                temp_ax = Axes3D(temp_fig)
                temp_ax.set_xlabel('Average motor power')
                temp_ax.set_ylabel('Velocity')
                temp_ax.set_zlabel('Time')

                outlier_line = temp_ax.scatter(outlier_power, outlier_velocity, outlier_time, c="black",
                                               label="outliers")
                acceleration_line = temp_ax.scatter(accelerating_power, accelerating_velocity, accelerating_time,
                                                    c="red",
                                                    label="accelerating")
                deceleration_line = temp_ax.scatter(decelerating_power, decelerating_velocity, decelerating_time,
                                                    c="blue",
                                                    label="decelerating")
                plt.show()

                easygui.msgbox("Next without outliers and rescaled")

                x = x[prediction != OUTLIER]
                y = y[prediction != OUTLIER]
                x = MinMaxScaler().fit_transform(x)

                outlier_line.remove()
                acceleration_line.remove()
                deceleration_line.remove()

                accelerating = x[y == ACCELERATING]
                decelerating = x[y == DECELERATING]
                accelerating_power, accelerating_velocity, accelerating_time = separate_feature(accelerating)
                decelerating_power, decelerating_velocity, decelerating_time = separate_feature(decelerating)

                acceleration_line = temp_ax.scatter(accelerating_power, accelerating_velocity, accelerating_time,
                                                    c="red",
                                                    label="accelerating")
                deceleration_line = temp_ax.scatter(decelerating_power, decelerating_velocity, decelerating_time,
                                                    c="blue",
                                                    label="decelerating")

                # train, test, train_L, test_L = train_test_split(x, y, train_size=.8, test_size=.2, random_state=0,
                #                                                 shuffle=True)
                # clf.fit(train, train_L)

                clf.fit(x, y)
                plot_hyperplane(clf, temp_ax)

                if len(total_data) == 0:
                    total_data = {"features": x, "labels": y}
                    changed_anything = True
                elif file not in already_used_files:
                    new_x = np.concatenate((total_data["features"], x))
                    new_y = np.concatenate((total_data["labels"], y))
                    temp_x = np.hstack((new_x, new_y.reshape((-1, 1))))
                    temp_x = np.unique(temp_x, axis=0)
                    new_x = temp_x[:, :-1]
                    new_y = temp_x[:, -1]

                    total_data["features"] = new_x
                    total_data["labels"] = new_y.ravel()

                    clf.fit(total_data["features"], total_data["labels"])
                    changed_anything = True

                if file not in already_used_files:  # FIXME can this just be in a single if statement?
                    ax3d.scatter(accelerating[:, 0], accelerating[:, 1], accelerating[:, 2], c="red",
                                 label="positive")
                    ax3d.scatter(decelerating[:, 0], decelerating[:, 1], decelerating[:, 2], c="blue",
                                 label="negative")

                    if hyperplane is not None:
                        hyperplane.remove()

                    hyperplane = plot_hyperplane(clf, ax3d)

                already_used_files.append(file)
            else:
                easygui.msgbox(
                    "The file {0:s} is not a valid file.".format(os.path.basename(file)))

        else:
            break

    if changed_anything and not is_empty_model(clf):
        joblib.dump(clf, MODEL_FILE)
        np.savez(MODEL_DATA_FILE, features=total_data["features"], labels=total_data["labels"],
                 files=already_used_files)
        easygui.msgbox("Model saved.")

    plt.close("all")
    return open_path