Esempio n. 1
0
def main():
    pm = path_manager()
    selected_dbs = select_db(pm.find_folders(pm.get_databases_dir()))

    for database in selected_dbs:

        # TODO fix this band-aid when i get a f**k
        if database[0] != 'all':
            db = prepare_db(database[0], pm)
        else:
            print(
                "we don't support that shit right now\nit's a feature, not a bug :)"
            )
            sys.exit()

            # BEGIN classification FFNN
        if db.get_dataset_type() == 'classification':

            # BEGIN preprocessing
            process_data.FFNN_encoding(db)

            # (1) First layer (input layer) has 1 node per attribute.
            # (2) Hidden layers has arbitrary number of nodes.
            # (3) Output layer has 1 node per possible classification.
            layer_sizes = [
                len(db.get_attr()) - 1, 50,
                len(db.get_class_list())
            ]

            # This number is arbitrary.
            # TODO Tune this per dataset
            learning_rate = .3

            ffnn = FFNN(layer_sizes, db.get_data(), db.get_dataset_type(),
                        learning_rate)
            sys.exit()

        # BEGIN regression FFNN
        elif db.get_dataset_type() == 'regression':

            process_data.FFNN_encoding(db)

            # (1) First layer (input layer) has 1 node per attribute.
            # (2) Hidden layers has arbitrary number of nodes.
            # (3) Output layer has 1 node, just some real number.
            layer_sizes = [len(db.get_attr()) - 1, 50, 1]

            # machine's learning rate
            # learning_rate = .60

            # Forest fire's learning rate
            learning_rate = .01

            ffnn = FFNN(layer_sizes, db.get_data(), db.get_dataset_type(),
                        learning_rate)
            sys.exit()

        else:
            print('Database type invalid. Type = ' + db.get_dataset_type())
Esempio n. 2
0
""" -------------------------------------------------------------
@file        pso_driver.py
@brief       A file for testing our pso implementation
"""

import process_data
import Cost_Functions as cf
from FFNN import FFNN
from path_manager import pathManager as path_manager
import numpy as np
import os.path
import prepare_data
import shared_functions as sf
import pso

pm = path_manager()
selected_dbs = prepare_data.select_db(pm.find_folders(pm.get_databases_dir()))

db = prepare_data.prepare_db(selected_dbs[0], pm)
process_data.shuffle_all(db.get_data(), 1)

process_data.FFNN_encoding(db)

half_idx = int(len(db.get_data()) * .5)

# FFNN stuff

# BEGIN classification FFNN
if db.get_dataset_type() == 'classification':
    # (1) First layer (input layer) has 1 node per attribute.
    # (2) Hidden layers has arbitrary number of nodes.
Esempio n. 3
0
def main_execution():
    # Sets to store all the loss func avs
    k_nn_classification_avgs = []
    k_nn_regress_avgs = []
    enn_avgs = []
    cnn_avgs = []
    k_means_classification_avgs = []
    k_means_regress_avgs = []
    k_medoid_classification_avgs = []
    k_medoid_regress_avgs = []

    reduction_funcs = [
        # 'edited_nn',
        # 'condensed_nn',
        'k_means',
        'k_medoids'
    ]

    # Initializes path manager with default directory as databases.
    pm = path_manager()

    # Loads in a list of database folders
    # for the user to select as the current database.
    selected_dbs = select_db(pm.find_folders(pm.get_databases_dir()))
    # TODO: change to get dataset type from db
    for database in selected_dbs:
        db = prepare_db(database, pm)
        k_nearest = knn(5, db.get_dataset_type(), \
                db.get_classifier_col(), \
                db.get_classifier_attr_cols())
        # Start k-fold cross validation
        print("RUNNING K-FOLD CROSS VALIDATION")
        # Prepare data for k-fold
        binned_data, bin_lengths = process_data.separate_data(
            db.get_attr(), db.get_data())
        # Extract validation set
        bin_lengths, validate_data, binned_data = validate.get_validate(
            bin_lengths, binned_data)
        debug_file.write('\n\nVALIDATION DATA: \n')
        for row in validate_data:
            debug_file.write(str(row) + '\n')
            #NOTE binned_data needs to still be shuffled somewhere above here

        # Run k-fold on just k-means first
        k_fold_results = validate.k_fold(9, binned_data, \
                                        validate_data, bin_lengths, \
                                        db, False, db.get_dataset_type(), \
                                        k_nearest, debug_file, output_file,)

        if db.get_dataset_type() == 'classification':
            k_nn_classification_avgs.append(
                sum(k_fold_results) / len(k_fold_results))
        elif db.get_dataset_type() == 'regression':
            k_nn_regress_avgs.append(sum(k_fold_results) / len(k_fold_results))

        output_file.write('\n\n\n')

        # Tuning
        if True:

            # Attributes to be removed
            removal_queue = []
            removed_attr_idx = None

            norm_sum = sum(k_fold_results) / len(k_fold_results)
            for attr_idx in db.get_classifier_attr_cols():

                # Stores full classifier attributes list.
                temp_db = deepcopy(k_nearest.get_class_cols())[:]

                # Recomputes k-fold cross validation
                # Sets databae classifier attributes idx list to shorter version temporarily.
                tmp = k_nearest.get_class_cols()
                tmp.remove(attr_idx)
                k_nearest.set_class_cols(tmp)

                # recomputes the k fold results for comparison
                # Prepare data for k-fold
                binned_data, bin_lengths = process_data.separate_data(
                    db.get_attr(), db.get_data())
                # Extract validation set
                bin_lengths, validate_data, binned_data = validate.get_validate(
                    bin_lengths, binned_data)

                # Run k-fold on just k-means first
                k_fold_results = validate.k_fold(9, binned_data, \
                                                validate_data, bin_lengths, \
                                                db, False, db.get_dataset_type(), \
                                                k_nearest, debug_file, output_file,)

                if db.get_dataset_type() == 'classification':
                    k_nn_classification_avgs.append(
                        sum(k_fold_results) / len(k_fold_results))
                elif db.get_dataset_type() == 'regression':
                    k_nn_regress_avgs.append(
                        sum(k_fold_results) / len(k_fold_results))

                attr_removed_sum = sum(k_fold_results) / len(k_fold_results)

                # Resets the database column set.
                print(k_nearest.get_class_cols())
                k_nearest.set_class_cols(temp_db)

                # Remove attr_idx from the removal queue if the accuracy is worse.
                print("COMPARISON FOR:", attr_idx, ", VALUES:", norm_sum, ">",
                      attr_removed_sum)
                print(k_nearest.get_class_cols())

                if db.get_dataset_type() == 'classification':
                    if norm_sum < attr_removed_sum:
                        # Add to removal queue.
                        removal_queue.append(attr_idx)
                elif db.get_dataset_type() == 'regression':
                    if norm_sum > attr_removed_sum:
                        # Add to removal queue.
                        removal_queue.append(attr_idx)

            final_attrs = k_nearest.get_class_cols()

            print("ATTRINBUTE IDX:", removal_queue)

            # Removes the attribute idx values from the database's list of attribute columns to be used.
            for attr_idx in removal_queue:
                final_attrs.remove(attr_idx)

            db.set_classifier_attr_cols(final_attrs)

            print(database, " is using attribute colums: ",
                  db.get_classifier_attr_cols())
            print(db.get_dataset_type())
Esempio n. 4
0
from data_viewer import data_viewer
from wind_simulation import wind_simulation
from autopilot import autopilot
from observer import observer
from tools.signals import signals
from waypoint_viewer import waypoint_viewer
from path_follower import path_follower
from path_manager import path_manager

# initialize dynamics object
dyn = Dynamics(SIM.ts_sim)
wind = wind_simulation(SIM.ts_sim)
ctrl = autopilot(SIM.ts_sim)
obsv = observer(SIM.ts_sim)
path_follow = path_follower()
path_manage = path_manager()

# waypoint definition
waypoints = msg_waypoints()
waypoints.type = 'straight_line'
waypoints.type = 'fillet'
waypoints.type = 'dubins'
waypoints.num_waypoints = 4
Va = PLAN.Va0
waypoints.ned[:, 0:waypoints.num_waypoints] = np.array([[0, 0, -100],
                                                        [1000, 0, -100],
                                                        [0, 1000, -100],
                                                        [1000, 1000, -100]]).T
waypoints.airspeed[:, 0:waypoints.num_waypoints] = np.array([[Va, Va, Va, Va]])
waypoints.course[:, 0:waypoints.num_waypoints] = np.array(
    [[0, np.radians(45), np.radians(45),
Esempio n. 5
0
def main():
    pm = path_manager()
    selected_dbs = select_db(pm.find_folders(pm.get_databases_dir()))

    for database in selected_dbs:
        # NOTE OUTPUT WILL WRITE TO A FILE, AS DEFINED BELOW:
        # MAKE SURE TO CREATE THIS DIRECTORY BEFORE YOU RUN, AND YOU CAN
        # SHOW THE FILE THAT'S CREATED IN THE VIDEO FOR OUTPUT
        filename = "../output/kmedoids/" + database + "_output.txt"
        output_file = open(filename, "w+")

        db = prepare_db(database, pm)
        k_nn = knn(5, db.get_dataset_type(), db.get_classifier_col(),
                   db.get_classifier_attr_cols())
        classes = db.get_class_list() if db.get_dataset_type(
        ) == 'classification' else []
        class_count = len(
            classes) if db.get_dataset_type() == 'classification' else 1
        X = process_data.shuffle_all(db.get_data(), 1)
        y = np.array(db.get_data())[:, db.get_classifier_col()]

        # RUN K-MEDOIDS ------------------------------------------------------------
        print("RUNNING K-MEDOIDS")
        kc = kcluster(10, 10, db.get_data(), db.get_classifier_attr_cols(),
                      'k-medoids')
        indices = kc.get_medoids()
        centers = [db.get_data()[i] for i in indices]
        rbf = RBF(len(centers), class_count, output_file, 25)
        rbf.fit(X, centers, y, db.get_dataset_type(), classes)
        print("INITIAL WEIGHTS: ", rbf.weights)
        output_file.write("INITIAL WEIGHTS: \n")
        output_file.write(str(rbf.weights) + "\n")
        print("CENTERS: ", centers)
        output_file.write("FINAL WEIGHTS: \n")
        output_file.write(str(rbf.weights) + "\n")
        output_file.write("FINAL TESTS: \n")
        rbf.test(X, db.get_dataset_type(), y, centers, classes)
        print("FINALS WEIGHTS:")
        print(rbf.weights)
        # ----------------------------------------------------------------------------

        # BEGIN classification FFNN
        if db.get_dataset_type() == 'classification':

            # BEGIN preprocessing
            process_data.FFNN_encoding(db)

            # (1) First layer (input layer) has 1 node per attribute.
            # (2) Hidden layers has arbitrary number of nodes.
            # (3) Output layer has 1 node per possible classification.

            layer_sizes = [len(db.get_attr()), 10,
                           len(db.get_class_list())]  # (3)

            # This number is arbitrary.
            # NOTICE: Tune this per dataset
            learning_rate = .5

            ffnn = FFNN(layer_sizes, db.get_dataset_type(), db_name,
                        db.get_data(), learning_rate)

            # BEGIN regression FFNN
        elif db.get_dataset_type() == 'regression':

            process_data.FFNN_encoding(db)

            # (1) First layer (input layer) has 1 node per attribute.
            # (2) Hidden layers has arbitrary number of nodes.
            # (3) Output layer has 1 node, just some real number.
            layer_sizes = [len(db.get_attr()) - 1, 5, 5, 1]

            learning_rate = .0001

            ffnn = FFNN(layer_sizes, db.get_dataset_type(), db_name,
                        db.get_data(), learning_rate)

        else:
            print('Database type invalid. Type = ' + db.get_dataset_type())
Esempio n. 6
0
def main_execution():
    # Sets to store all the loss func avs
    k_nn_classification_avgs = []
    k_nn_regress_avgs = []
    enn_avgs = []
    cnn_avgs = []
    k_means_classification_avgs = []
    k_means_regress_avgs = []
    k_medoid_classification_avgs = []
    k_medoid_regress_avgs = []

    reduction_funcs = ['edited_nn', 'condensed_nn', 'k_means', 'k_medoids']

    # Initializes path manager with default directory as databases.
    pm = path_manager()

    # Loads in a list of database folders
    # for the user to select as the current database.
    selected_dbs = select_db(pm.find_folders(pm.get_databases_dir()))
    for database in selected_dbs:
        db = prepare_db(database, pm)
        k_nearest = knn(15, db.get_dataset_type(), \
                db.get_classifier_col(), \
                db.get_classifier_attr_cols())
        # Start k-fold cross validation
        print("RUNNING K-FOLD CROSS VALIDATION")
        # Prepare data for k-fold
        binned_data, bin_lengths = process_data.separate_data(
            db.get_attr(), db.get_data())
        # Extract validation set
        bin_lengths, validate_data, binned_data = validate.get_validate(
            bin_lengths, binned_data)
        debug_file.write('\n\nVALIDATION DATA: \n')
        for row in validate_data:
            debug_file.write(str(row) + '\n')

        # Run k-fold on just k-means first
        k_fold_results = validate.k_fold(9, binned_data, \
                                        validate_data, bin_lengths, \
                                        db, True, db.get_dataset_type(), \
                                        k_nearest, debug_file, output_file,)

        if db.get_dataset_type() == 'classification':
            k_nn_classification_avgs.append(
                sum(k_fold_results) / len(k_fold_results))
        elif db.get_dataset_type() == 'regression':
            k_nn_regress_avgs.append(sum(k_fold_results) / len(k_fold_results))

        output_file.write('\n\n\n')

        # Loop thru all reduction functions
        for func in reduction_funcs:
            print('RUNNING ', func)
            # we are removing a bin from bin_lengths

            if db.get_dataset_type() == 'classification':
                k_fold_results = validate.k_fold(9, binned_data, \
                                                validate_data, bin_lengths, db, \
                                                True, db.get_dataset_type(), \
                                                k_nearest, debug_file, output_file, func)

                if func == 'edited_nn':
                    enn_avgs.append(sum(k_fold_results) / len(k_fold_results))
                elif func == 'condensed_nn':
                    cnn_avgs.append(sum(k_fold_results) / len(k_fold_results))
                elif func == 'k_means':
                    k_means_classification_avgs.append(
                        sum(k_fold_results) / len(k_fold_results))
                elif func == 'k_medoids':
                    k_medoid_classification_avgs.append(
                        sum(k_fold_results) / len(k_fold_results))

            elif db.get_dataset_type() == 'regression':
                if func == 'edited_nn' or func == 'condensed_nn':
                    continue

                # Shrink data to quarter of the size
                db_small = process_data.random_data_from(db.get_data(), 0.25)
                # Re-bin it after shrinking
                binned_data, bin_lengths = process_data.separate_data(
                    db.get_attr(), db_small)

                k_fold_results = validate.k_fold(9, binned_data, \
                                                validate_data, bin_lengths, db, \
                                                True, db.get_dataset_type(), \
                                                k_nearest, debug_file, output_file, func)

                if func == 'k_means':
                    pass
                elif func == 'k_medoids':
                    pass

            output_file.write('K FOLD RESULTS: ' + str(k_fold_results))

            output_file.write('\n\n\n')

        print("KNN CLASSIFICATION AVGS: ", k_nn_classification_avgs)
        print("KNN REGRESSION AVGS: ", k_nn_regress_avgs)
        print("ENN AVGS: ", enn_avgs)
        print("CNN AVGS: ", cnn_avgs)
        print("K MEANS CLASSIFICATION AVGS: ", k_means_classification_avgs)
        print("K MEANS REGRESSION AVGS: ", k_means_regress_avgs)
        print("K MEDOID CLASSIFICATION AVGS: ", k_medoid_classification_avgs)
        print("K MEDOID REGRESSION AVGS", k_medoid_regress_avgs)