def train(training_data_file_path):
    data_loader = AccelerometerDatasetLoader(training_data_file_path,
                                             WINDOW_SIZE, STEP_SIZE, True)

    classifiers = {
        # 'decision_tree': DecisionTreeClassifier(),
        'random_forrest': RandomForestClassifier(),
        # 'svm': SVC(
        #     gamma='scale',
        #     decision_function_shape='ovo',
        #     probability=True),
        # 'adaboost': AdaBoostClassifier(),
        # 'radius_neighbors_uniform':
        #     RadiusNeighborsClassifier(weights='uniform'),
        # 'radius_neighbors_distance':
        #     RadiusNeighborsClassifier(weights='distance'),
        # 'k_nearest_neighbors_uniform':
        #     KNeighborsClassifier(weights='uniform'),
        # 'k_nearest_neighbors_distance':
        #     KNeighborsClassifier(weights='distance'),
        'gradient_boosting': GradientBoostingClassifier(),
        'extra_trees': ExtraTreesClassifier()
    }
    train_users = data_loader.users

    train_windows = []
    train_labels = []

    for train_user in train_users:
        print(f'Preparing training data for user {train_user}')
        for window_data, label in \
                data_loader.get_user_data_windows(train_user):

            features = prepare_features(window_data)
            train_windows.append(features)
            train_labels.append(label)
    print('Finished train data preparation')

    train_data = list(zip(train_windows, train_labels))
    shuffle(train_data)

    # clean up
    cleaned_train_windows = []
    cleaned_train_labels = []
    for i in range(len(train_data)):
        train_window = train_data[i][0]

        if sum(np.isfinite(train_window)) == len(train_window):
            cleaned_train_windows.append(train_window)
            cleaned_train_labels.append(train_data[i][1])

    for classifier_name, classifier in classifiers.items():
        print(f'Training classifier {classifier_name}')
        # classifier.fit(train_windows, train_labels)
        classifier.fit(cleaned_train_windows, cleaned_train_labels)
        with open(f'{classifier_name}.pickle', 'wb') as classifier_file:
            pickle.dump(classifier, classifier_file)
    print('Finished training')
def leave_one_user_out_training(training_data_file_path):

    for window_size in [15, 30, 45, 60, 90, 120, 150, 180, 210, 240, 270, 300]:
        print()
        print(
            '###############################################################')
        print(f'Running evaluation for window size {window_size}')
        print()

        step_size = 10
        majority_smoother = MajorityVoteSmoother(10, 15)
        data_loader = AccelerometerDatasetLoader(training_data_file_path,
                                                 window_size, step_size, True)

        results_file_name = \
            f'w{window_size}_s{step_size}_unibo_magnitude_fft_' \
            f'w_maj_vote_w_interpolation.csv'

        with open(results_file_name, 'w') as res_file:
            csv_writer = csv.writer(res_file)
            csv_header = [
                'test_user', 'num_samples', 'algorithm', 'matched', 'missed',
                'acc', 'smoothed_matched', 'smoothed_missed', 'smoothed_acc',
                'maj_vote_smoothed_acc'
            ]
            csv_writer.writerow(csv_header)

            for user in data_loader.users:
                classifiers = {
                    'decision_tree':
                    DecisionTreeClassifier(),
                    'random_forrest':
                    RandomForestClassifier(),
                    'svm':
                    SVC(gamma='scale',
                        decision_function_shape='ovo',
                        probability=True),
                    'adaboost':
                    AdaBoostClassifier(),
                    'gradient_boosting':
                    GradientBoostingClassifier(),
                    'extra_trees':
                    ExtraTreesClassifier()
                }
                train_users = data_loader.users.copy()
                train_users.remove(user)
                test_user = user

                train_windows = []
                train_labels = []

                for train_user in train_users:
                    print(f'Preparing training data for user {train_user}')
                    for window, label in \
                            data_loader.get_user_data_windows(train_user):

                        magnitude = \
                            np.sqrt(window.x**2 + window.y**2 + window.z**2)

                        frequency_window = np.fft.fft(magnitude).real
                        train_windows.append(frequency_window)
                        train_labels.append(label)
                print('Finished train data preparation')

                for classifier_name, classifier in classifiers.items():
                    print(f'Training classifier {classifier_name}')
                    classifier.fit(train_windows, train_labels)
                print('Finished training')

                test_windows = []
                test_labels = []

                print(f'Preparing test data for user {test_user}')
                for window, label in \
                        data_loader.get_user_data_windows(test_user):

                    magnitude = np.sqrt(window.x**2 + window.y**2 +
                                        window.z**2)
                    frequency_window = np.fft.fft(magnitude).real
                    test_windows.append(frequency_window)
                    test_labels.append(label)
                print('Finished test data preparation')

                if len(test_windows) == 0:
                    print(f'No data for test user {test_user}. Skipping...')
                    continue

                for classifier_name, classifier in classifiers.items():
                    print(f'Running prediction for classifier '
                          f'{classifier_name}')
                    predictions = classifier.predict(test_windows)

                    same = sum(predictions == test_labels)
                    not_same = sum(predictions != test_labels)
                    acc = same / (same + not_same) * 100
                    print(f'o_len: {len(predictions)}')
                    print(f'o_==:  {same}')
                    print(f'o_!=:  {not_same}')
                    print(f'o_acc: {acc}')

                    prediction_ints = [MODE_TO_INT[m] for m in predictions]
                    print('Running smoothing...')
                    smoothed_ints = pwctvdrobust.pwc_tvdrobust(prediction_ints)
                    majority_smoothed = \
                        np.array(majority_smoother.smooth(predictions))
                    print('Smoothing done...')

                    smoothed_predictions = np.array(
                        [INT_TO_MODE[int(round(f))] for f in smoothed_ints])
                    smoothed_same = sum(smoothed_predictions == test_labels)
                    smoothed_not_same = sum(
                        smoothed_predictions != test_labels)
                    smoothed_acc = smoothed_same / len(test_labels) * 100

                    print(f's_len: {len(predictions)}')
                    print(f's_==:  {smoothed_same}')
                    print(f's_!=:  {smoothed_not_same}')
                    print(f's_acc: {smoothed_acc}')

                    majority_smoothed_same = \
                        sum(majority_smoothed == test_labels)
                    majority_smoothed_not_same = \
                        sum(majority_smoothed != test_labels)
                    majority_smoothed_acc = \
                        majority_smoothed_same / len(test_labels) * 100
                    print(f'm_len: {len(predictions)}')
                    print(f'm_==:  {majority_smoothed_same}')
                    print(f'm_!=:  {majority_smoothed_not_same}')
                    print(f'm_acc: {majority_smoothed_acc}')

                    # test_user, num_samples, algorithm, matched, missed, acc,
                    # smoothed_matched, smoothed_missed, smoothed_acc
                    csv_writer.writerow([
                        test_user,
                        len(predictions), classifier_name, same, not_same, acc,
                        smoothed_same, smoothed_not_same, smoothed_acc,
                        majority_smoothed_acc
                    ])
def leave_one_user_out_training(training_data_file_path):

    for window_size in [
            1, 2, 5, 10, 15, 20, 30, 45, 60, 90, 120, 150, 180, 210, 240, 270,
            300
    ]:
        print()
        print(
            '###############################################################')
        print(f'Running evaluation for window size {window_size}')
        print()

        step_size = 10
        majority_smoother = MajorityVoteSmoother(10, 15)
        data_loader = AccelerometerDatasetLoader(training_data_file_path,
                                                 window_size, step_size, True)

        results_file_name = \
            f'w{window_size}_s{step_size}__all_features__w_interpolation_' \
            f'w_maj_vote__w_pvcrobust__shuffled__1st_2nd_3rd.csv'

        with open(results_file_name, 'w') as res_file:
            csv_writer = csv.writer(res_file)
            csv_header = [
                'test_user', 'num_samples', 'algorithm', 'avg_window_entropy',
                'matched', 'missed', 'acc', 'smoothed_matched',
                'smoothed_missed', 'smoothed_acc', 'maj_vote_smoothed_acc'
            ]
            csv_writer.writerow(csv_header)

            for user in data_loader.users:
                classifiers = {
                    'decision_tree':
                    DecisionTreeClassifier(),
                    'random_forrest':
                    RandomForestClassifier(),
                    'svm':
                    SVC(gamma='scale',
                        decision_function_shape='ovo',
                        probability=True),
                    'adaboost':
                    AdaBoostClassifier(),
                    'gradient_boosting':
                    GradientBoostingClassifier(),
                    'extra_trees':
                    ExtraTreesClassifier()
                }
                train_users = data_loader.users.copy()
                train_users.remove(user)
                test_user = user

                train_windows = []
                train_labels = []

                for train_user in train_users:
                    print(f'Preparing training data for user {train_user}')
                    for window_data, label in \
                            data_loader.get_user_data_windows(train_user):

                        features = prepare_features(window_data)
                        train_windows.append(features)
                        train_labels.append(label)
                print('Finished train data preparation')

                train_data = list(zip(train_windows, train_labels))
                shuffle(train_data)

                # clean up
                cleaned_train_windows = []
                cleaned_train_labels = []
                for i in range(len(train_data)):
                    train_window = train_data[i][0]

                    if sum(np.isfinite(train_window)) == len(train_window):
                        cleaned_train_windows.append(train_window)
                        cleaned_train_labels.append(train_data[i][1])

                if len(cleaned_train_windows) == 0:
                    print('No train data left after cleaning...')
                    import pdb
                    pdb.set_trace()
                    continue

                for classifier_name, classifier in classifiers.items():
                    print(f'Training classifier {classifier_name}')
                    classifier.fit(cleaned_train_windows, cleaned_train_labels)
                print('Finished training')

                test_windows = []
                test_labels = []

                print(f'Preparing test data for user {test_user}')
                for window_data, label in \
                        data_loader.get_user_data_windows(test_user):
                    features = prepare_features(window_data)

                    test_windows.append(features)
                    test_labels.append(label)
                print('Finished test data preparation')

                if len(test_windows) == 0:
                    print(f'No data for test user {test_user}. Skipping...')
                    continue

                # clean up
                cleaned_test_windows = []
                cleaned_test_labels = []
                for i in range(len(test_windows)):
                    test_window = test_windows[i]

                    if sum(np.isfinite(test_window)) == len(test_window):
                        cleaned_test_windows.append(test_window)
                        cleaned_test_labels.append(test_labels[i])

                if len(cleaned_test_windows) == 0:
                    print('No test data left after cleaning...')
                    continue

                for classifier_name, classifier in classifiers.items():
                    print(f'Running prediction for classifier '
                          f'{classifier_name}')
                    predictions = classifier.predict(cleaned_test_windows)

                    same = sum(predictions == cleaned_test_labels)
                    not_same = sum(predictions != cleaned_test_labels)
                    acc = same / (same + not_same) * 100
                    print(f'o_len: {len(predictions)}')
                    print(f'o_==:  {same}')
                    print(f'o_!=:  {not_same}')
                    print(f'o_acc: {acc}')

                    prediction_ints = [MODE_TO_INT[m] for m in predictions]
                    print('Running smoothing...')
                    smoothed_ints = pwctvdrobust.pwc_tvdrobust(prediction_ints)
                    majority_smoothed = \
                        np.array(majority_smoother.smooth(predictions))
                    print('Smoothing done...')

                    smoothed_predictions = np.array(
                        [INT_TO_MODE[int(round(f))] for f in smoothed_ints])
                    smoothed_same = \
                        sum(smoothed_predictions == cleaned_test_labels)
                    smoothed_not_same = \
                        sum(smoothed_predictions != cleaned_test_labels)
                    smoothed_acc = \
                        smoothed_same / len(cleaned_test_labels) * 100

                    print(f's_len: {len(predictions)}')
                    print(f's_==:  {smoothed_same}')
                    print(f's_!=:  {smoothed_not_same}')
                    print(f's_acc: {smoothed_acc}')

                    majority_smoothed_same = \
                        sum(majority_smoothed == cleaned_test_labels)
                    majority_smoothed_not_same = \
                        sum(majority_smoothed != cleaned_test_labels)
                    majority_smoothed_acc = \
                        majority_smoothed_same / len(cleaned_test_labels) * 100
                    print(f'm_len: {len(predictions)}')
                    print(f'm_==:  {majority_smoothed_same}')
                    print(f'm_!=:  {majority_smoothed_not_same}')
                    print(f'm_acc: {majority_smoothed_acc}')

                    # test_user, num_samples, algorithm, matched, missed, acc,
                    # smoothed_matched, smoothed_missed, smoothed_acc
                    csv_writer.writerow([
                        test_user,
                        len(predictions), classifier_name, same, not_same, acc,
                        smoothed_same, smoothed_not_same, smoothed_acc,
                        majority_smoothed_acc
                    ])