def graph(self, file_data):
        """
    Graphs the features from the log file. Creates a 3D graph with time, average power to motors and velocity as axises.
    It also decomposes the dimensions into individual 2D graphs.
        :param file_data: the log file to use to extract the data from
        """

        self.file_data = file_data
        self.features, self.headers = get_features(file_data)

        # FIXME make it so that the outliers can be visualized as well
        self.new_scaled_features, self.features = manipulate_features(
            self.features, file_data)
        # features = scaler.inverse_transform(new_scaled_features)

        if self.show_outliers:
            self.new_scaled_features, self.outliers, self.outlier_detector = find_and_remove_outliers(
                self.new_scaled_features)

        if self.manually_find_remove_outliers:
            selector = remove_outliers(self.new_scaled_features)

            self.new_scaled_features = self.new_scaled_features[
                selector.indexes]
            self.features = self.features[selector.indexes]

        self.labels = self.clf.predict(self.new_scaled_features)
        self.color_labels = list(
            map(lambda x: 'r' if x == 0 else 'b', self.labels))
    def graph(self, file_data):
        """
    Graphs the features from the log file. Creates a 3D graph with time, average power to motors and velocity as axises.
    It also decomposes the dimensions into individual 2D graphs.
        :param file_data: the log file to use to extract the data from
        """
        self.clear_graphs()

        features, headers = get_features(file_data)

        new_scaled_features, outliers, features = self.manipulate_features(features, file_data)
        # features = scaler.inverse_transform(new_scaled_features)

        labels = self.clf.predict(new_scaled_features)
        color_labels = list(map(lambda x: 'r' if x == 0 else 'b', labels))

        self.plot_3d_plot(new_scaled_features, headers, color_labels)

        if self.show_outliers:
            self.master_plot.scatter(outliers[:, 0], outliers[:, 1], outliers[:, 2], c="black")

        self.show_constants_graph(features, file_data, labels, c=color_labels)

        plot_subplots(new_scaled_features, headers, (self.time_velocity, self.time_power, self.power_velocity),
                      color_labels)

        plt.draw()
def train_model(open_path):
    """

       :param open_path:
       :return:
       """
    # TODO x = motor power, y velocity, z time

    while True:
        file = easygui.fileopenbox('Please locate csv file',
                                   'Specify File',
                                   default=open_path,
                                   filetypes='*.csv')

        if file:
            open_path = "{0:s}\*.csv".format(os.path.dirname(file))

            file_data = get_data(file)

            # TODO make this loop thought the steps as many times as they are number of paths
            if is_valid_log(file_data, visualize.LEGACY_COLUMNS):
                x, _ = get_features(file_data)

                outlier = OutlierAndScalingSelection(file_data, x)
                outlier.show()

                del outlier

        else:
            break
def train_model(open_path):
    """

       :param open_path:
       :return:
       """
    # TODO x = motor power, y velocity, z time

    while True:
        files = easygui.fileopenbox('Please locate csv file',
                                    'Specify File',
                                    default=open_path,
                                    filetypes='*.csv',
                                    multiple=True)

        if files:
            open_path = "{0:s}\*.csv".format(os.path.dirname(files[0]))

            all_features = None
            all_data = None

            for file in files:
                file_data = get_data(file)

                if is_valid_log(file_data, LEGACY_COLUMNS):
                    features, _ = get_features(file_data)

                    time = MinMaxScaler().fit_transform(features[:, 2].reshape(
                        -1, 1))

                    features[:, 2] = time.reshape(1, -1)

                    if all_features is None:
                        all_features = features
                        all_data = file_data
                    else:
                        all_features = np.concatenate((all_features, features))

                        file_data['pathNumber'] += (
                            all_data["pathNumber"].max() + 1)

                        all_data = np.concatenate((all_data, file_data))
                else:
                    easygui.msgbox(
                        "The file {0:s} is not a valid file, it will not be plotted."
                        .format(os.path.basename(file)))

            go_through_process(all_features, all_data)
        else:
            break

def reverse_scalling(features, scalers, outlier_prediction):
    features = np.copy(features)

    for scaler, index in zip(scalers.keys(), scalers.values()):
        index = index[outlier_prediction == 1]

        features[index] = scaler.inverse_transform(features[index])

    return features


data = helper.get_data(r"..\example_data\2018-03-21 08-29-04.csv")

features, col = get_features(data)
filters = data['motionState'] == 'MOVING'
data = data[filters]
features = features[filters]
# f = helper.get_features(data)

XTE = data['XTE']
lagE = data['lagE']
angleE = data['angleE']

targetX = data["xTarget"]
targetY = data["yTarget"]
targetAngle = data["angleTarget"]

actualX = data["xActual"]
actualY = data["yActual"]
def find_gain(clf, file_data, is_data=False, ax3d=None, ax2d=None):
    """

    :param clf:
    :param file_data:
    :param is_data:
    :param ax3d:
    :param ax2d:
    :return:
    """
    if not is_data:
        file_data = np.genfromtxt(file_data, delimiter=',', dtype=DTYPE, names=True)

    x, _ = get_features(file_data)
    x = x[file_data["motionState"] == 'MOVING']

    out = IsolationForest(n_jobs=-1, random_state=0)
    out.fit(x)
    predicted = out.predict(x)
    x = x[predicted == 1]
    x_scaled = MinMaxScaler().fit_transform(x)
    predicted = clf.predict(x_scaled)

    acceleration = x[predicted == 0]
    average_power_accelerating = acceleration[:, 0]
    velocity_accelerating = acceleration[:, 1]

    deceleration = x[predicted == 1]
    average_power_decelerating = deceleration[:, 0]
    velocity_decelerating = deceleration[:, 1]

    accelerating_coefficient, accelerating_intercept = find_linear_best_fit_line(velocity_accelerating,
                                                                                 average_power_accelerating)
    decelerating_coefficient, decelerating_intercept = find_linear_best_fit_line(velocity_decelerating,
                                                                                 average_power_decelerating)
    k_v = (accelerating_coefficient + decelerating_coefficient) / 2
    k_k = (accelerating_intercept + decelerating_intercept) / 2

    acceleration_coefficient = (accelerating_coefficient - k_v)
    acceleration_intercept = (accelerating_intercept - k_k)
    k_acc = ((x[:, 1].max() - x[:, 1].min()) / 2) * acceleration_coefficient + acceleration_intercept

    if ax3d or ax2d:
        colors = ["red" if i == 0 else "blue" for i in predicted]

        if ax3d:
            ax3d.set_xlabel('Velocity')
            ax3d.set_ylabel('Average motor power')
            ax3d.set_zlabel('Scaled Time')

            scaled_average_power = np.hstack(x_scaled[:, 1])
            scaled_velocity = np.hstack(x_scaled[:, 0])
            time = np.hstack(x_scaled[:, 2])
            ax3d.scatter(scaled_velocity, scaled_average_power, time, c=colors)

        if ax2d:
            ax2d.set_xlabel('Velocity')
            ax2d.set_ylabel('Average motor power')
            velocity = x[:, 1]
            average_power = x[:, 0]
            ax2d.scatter(velocity, average_power, c=colors)

            y_lim = np.array(ax2d.get_ylim())
            # TODO make the lines not exceed the x limit as well

            for c, i in zip([k_v, accelerating_coefficient, decelerating_coefficient],
                            [k_k, accelerating_intercept, decelerating_intercept]):
                ax2d.plot((y_lim - i) / c, y_lim)

    return k_v, k_k, k_acc
def train_model(open_path):
    """

    :param open_path:
    :return:
    """
    # TODO add lasso selection of points for data that was not classified manually.
    # TODO Should be able to select outliers and what side is positive or not

    # TODO create 2d plots for every dimension and use lasso selection from there
    fig = plt.figure("Complete classifier")
    ax3d = Axes3D(fig)
    ax3d.set_xlabel('Average motor power')
    ax3d.set_ylabel('Velocity')
    ax3d.set_zlabel('Time')

    total_data = {}
    already_used_files = set()
    changed_anything = False
    hyperplane = None

    plt.ion()
    if os.path.exists(MODEL_FILE):
        answer = easygui.boolbox("A model already exists do you wish to use it?")

        if answer is None:
            return

        elif answer:
            clf = joblib.load(MODEL_FILE)
            hyperplane = plot_hyperplane(clf, ax3d)
            data = np.load(MODEL_DATA_FILE)
            total_data["features"] = data["features"]
            total_data["labels"] = data["labels"]

            accelerating = total_data["features"][total_data["labels"] == 0]
            decelerating = total_data["features"][total_data["labels"] == 1]

            ax3d.scatter(accelerating[:, 0], accelerating[:, 1], accelerating[:, 2], c="red",
                         label="acceleration")
            ax3d.scatter(decelerating[:, 0], decelerating[:, 1], decelerating[:, 2], c="blue",
                         label="deceleration")

            already_used_files.add(*data["files"])

            plt.show()
        else:
            clf = create_blank_classifier()
            changed_anything = True
    else:
        clf = create_blank_classifier()

    while True:
        file = easygui.fileopenbox('Please locate csv file', 'Specify File', default=open_path, filetypes='*.csv')

        if file:
            open_path = "{0:s}\*.csv".format(os.path.dirname(file))

            file_data = get_data(file)

            # TODO make this loop thought the steps as many times as they are number of paths
            if is_valid_log(file_data, visualize.LEGACY_COLUMNS):
                x, _ = get_features(file_data)
                y = get_labels(file_data)

                # x = x[file_data["motionState"] == 'MOVING']
                # y = y[file_data["motionState"] == 'MOVING']

                outlier = IsolationForest(n_jobs=-1, random_state=0)

                temp_y = y[y != OUTLIER] = 1
                outlier.fit(x, temp_y)
                prediction = outlier.predict(x)
                # outlier = LocalOutlierFactor(n_jobs=-1, )
                # outlier = EllipticEnvelope(random_state=0)
                # prediction = outlier.fit_predict(x)

                y[prediction == OUTLIER] = OUTLIER

                outliers = x[y == OUTLIER]
                accelerating = x[y == ACCELERATING]
                decelerating = x[y == DECELERATING]
                outlier_power, outlier_velocity, outlier_time = separate_feature(outliers)
                accelerating_power, accelerating_velocity, accelerating_time = separate_feature(accelerating)
                decelerating_power, decelerating_velocity, decelerating_time = separate_feature(decelerating)

                temp_fig = plt.figure(os.path.basename(file).split(".")[0])
                temp_ax = Axes3D(temp_fig)
                temp_ax.set_xlabel('Average motor power')
                temp_ax.set_ylabel('Velocity')
                temp_ax.set_zlabel('Time')

                outlier_line = temp_ax.scatter(outlier_power, outlier_velocity, outlier_time, c="black",
                                               label="outliers")
                acceleration_line = temp_ax.scatter(accelerating_power, accelerating_velocity, accelerating_time,
                                                    c="red",
                                                    label="accelerating")
                deceleration_line = temp_ax.scatter(decelerating_power, decelerating_velocity, decelerating_time,
                                                    c="blue",
                                                    label="decelerating")
                plt.show()

                easygui.msgbox("Next without outliers and rescaled")

                x = x[prediction != OUTLIER]
                y = y[prediction != OUTLIER]
                x = MinMaxScaler().fit_transform(x)

                outlier_line.remove()
                acceleration_line.remove()
                deceleration_line.remove()

                accelerating = x[y == ACCELERATING]
                decelerating = x[y == DECELERATING]
                accelerating_power, accelerating_velocity, accelerating_time = separate_feature(accelerating)
                decelerating_power, decelerating_velocity, decelerating_time = separate_feature(decelerating)

                acceleration_line = temp_ax.scatter(accelerating_power, accelerating_velocity, accelerating_time,
                                                    c="red",
                                                    label="accelerating")
                deceleration_line = temp_ax.scatter(decelerating_power, decelerating_velocity, decelerating_time,
                                                    c="blue",
                                                    label="decelerating")

                # train, test, train_L, test_L = train_test_split(x, y, train_size=.8, test_size=.2, random_state=0,
                #                                                 shuffle=True)
                # clf.fit(train, train_L)

                clf.fit(x, y)
                plot_hyperplane(clf, temp_ax)

                if len(total_data) == 0:
                    total_data = {"features": x, "labels": y}
                    changed_anything = True
                elif file not in already_used_files:
                    new_x = np.concatenate((total_data["features"], x))
                    new_y = np.concatenate((total_data["labels"], y))
                    temp_x = np.hstack((new_x, new_y.reshape((-1, 1))))
                    temp_x = np.unique(temp_x, axis=0)
                    new_x = temp_x[:, :-1]
                    new_y = temp_x[:, -1]

                    total_data["features"] = new_x
                    total_data["labels"] = new_y.ravel()

                    clf.fit(total_data["features"], total_data["labels"])
                    changed_anything = True

                if file not in already_used_files:  # FIXME can this just be in a single if statement?
                    ax3d.scatter(accelerating[:, 0], accelerating[:, 1], accelerating[:, 2], c="red",
                                 label="positive")
                    ax3d.scatter(decelerating[:, 0], decelerating[:, 1], decelerating[:, 2], c="blue",
                                 label="negative")

                    if hyperplane is not None:
                        hyperplane.remove()

                    hyperplane = plot_hyperplane(clf, ax3d)

                already_used_files.append(file)
            else:
                easygui.msgbox(
                    "The file {0:s} is not a valid file.".format(os.path.basename(file)))

        else:
            break

    if changed_anything and not is_empty_model(clf):
        joblib.dump(clf, MODEL_FILE)
        np.savez(MODEL_DATA_FILE, features=total_data["features"], labels=total_data["labels"],
                 files=already_used_files)
        easygui.msgbox("Model saved.")

    plt.close("all")
    return open_path