def main(): pm = path_manager() selected_dbs = select_db(pm.find_folders(pm.get_databases_dir())) for database in selected_dbs: # TODO fix this band-aid when i get a f**k if database[0] != 'all': db = prepare_db(database[0], pm) else: print( "we don't support that shit right now\nit's a feature, not a bug :)" ) sys.exit() # BEGIN classification FFNN if db.get_dataset_type() == 'classification': # BEGIN preprocessing process_data.FFNN_encoding(db) # (1) First layer (input layer) has 1 node per attribute. # (2) Hidden layers has arbitrary number of nodes. # (3) Output layer has 1 node per possible classification. layer_sizes = [ len(db.get_attr()) - 1, 50, len(db.get_class_list()) ] # This number is arbitrary. # TODO Tune this per dataset learning_rate = .3 ffnn = FFNN(layer_sizes, db.get_data(), db.get_dataset_type(), learning_rate) sys.exit() # BEGIN regression FFNN elif db.get_dataset_type() == 'regression': process_data.FFNN_encoding(db) # (1) First layer (input layer) has 1 node per attribute. # (2) Hidden layers has arbitrary number of nodes. # (3) Output layer has 1 node, just some real number. layer_sizes = [len(db.get_attr()) - 1, 50, 1] # machine's learning rate # learning_rate = .60 # Forest fire's learning rate learning_rate = .01 ffnn = FFNN(layer_sizes, db.get_data(), db.get_dataset_type(), learning_rate) sys.exit() else: print('Database type invalid. Type = ' + db.get_dataset_type())
""" ------------------------------------------------------------- @file pso_driver.py @brief A file for testing our pso implementation """ import process_data import Cost_Functions as cf from FFNN import FFNN from path_manager import pathManager as path_manager import numpy as np import os.path import prepare_data import shared_functions as sf import pso pm = path_manager() selected_dbs = prepare_data.select_db(pm.find_folders(pm.get_databases_dir())) db = prepare_data.prepare_db(selected_dbs[0], pm) process_data.shuffle_all(db.get_data(), 1) process_data.FFNN_encoding(db) half_idx = int(len(db.get_data()) * .5) # FFNN stuff # BEGIN classification FFNN if db.get_dataset_type() == 'classification': # (1) First layer (input layer) has 1 node per attribute. # (2) Hidden layers has arbitrary number of nodes.
def main_execution(): # Sets to store all the loss func avs k_nn_classification_avgs = [] k_nn_regress_avgs = [] enn_avgs = [] cnn_avgs = [] k_means_classification_avgs = [] k_means_regress_avgs = [] k_medoid_classification_avgs = [] k_medoid_regress_avgs = [] reduction_funcs = [ # 'edited_nn', # 'condensed_nn', 'k_means', 'k_medoids' ] # Initializes path manager with default directory as databases. pm = path_manager() # Loads in a list of database folders # for the user to select as the current database. selected_dbs = select_db(pm.find_folders(pm.get_databases_dir())) # TODO: change to get dataset type from db for database in selected_dbs: db = prepare_db(database, pm) k_nearest = knn(5, db.get_dataset_type(), \ db.get_classifier_col(), \ db.get_classifier_attr_cols()) # Start k-fold cross validation print("RUNNING K-FOLD CROSS VALIDATION") # Prepare data for k-fold binned_data, bin_lengths = process_data.separate_data( db.get_attr(), db.get_data()) # Extract validation set bin_lengths, validate_data, binned_data = validate.get_validate( bin_lengths, binned_data) debug_file.write('\n\nVALIDATION DATA: \n') for row in validate_data: debug_file.write(str(row) + '\n') #NOTE binned_data needs to still be shuffled somewhere above here # Run k-fold on just k-means first k_fold_results = validate.k_fold(9, binned_data, \ validate_data, bin_lengths, \ db, False, db.get_dataset_type(), \ k_nearest, debug_file, output_file,) if db.get_dataset_type() == 'classification': k_nn_classification_avgs.append( sum(k_fold_results) / len(k_fold_results)) elif db.get_dataset_type() == 'regression': k_nn_regress_avgs.append(sum(k_fold_results) / len(k_fold_results)) output_file.write('\n\n\n') # Tuning if True: # Attributes to be removed removal_queue = [] removed_attr_idx = None norm_sum = sum(k_fold_results) / len(k_fold_results) for attr_idx in db.get_classifier_attr_cols(): # Stores full classifier attributes list. temp_db = deepcopy(k_nearest.get_class_cols())[:] # Recomputes k-fold cross validation # Sets databae classifier attributes idx list to shorter version temporarily. tmp = k_nearest.get_class_cols() tmp.remove(attr_idx) k_nearest.set_class_cols(tmp) # recomputes the k fold results for comparison # Prepare data for k-fold binned_data, bin_lengths = process_data.separate_data( db.get_attr(), db.get_data()) # Extract validation set bin_lengths, validate_data, binned_data = validate.get_validate( bin_lengths, binned_data) # Run k-fold on just k-means first k_fold_results = validate.k_fold(9, binned_data, \ validate_data, bin_lengths, \ db, False, db.get_dataset_type(), \ k_nearest, debug_file, output_file,) if db.get_dataset_type() == 'classification': k_nn_classification_avgs.append( sum(k_fold_results) / len(k_fold_results)) elif db.get_dataset_type() == 'regression': k_nn_regress_avgs.append( sum(k_fold_results) / len(k_fold_results)) attr_removed_sum = sum(k_fold_results) / len(k_fold_results) # Resets the database column set. print(k_nearest.get_class_cols()) k_nearest.set_class_cols(temp_db) # Remove attr_idx from the removal queue if the accuracy is worse. print("COMPARISON FOR:", attr_idx, ", VALUES:", norm_sum, ">", attr_removed_sum) print(k_nearest.get_class_cols()) if db.get_dataset_type() == 'classification': if norm_sum < attr_removed_sum: # Add to removal queue. removal_queue.append(attr_idx) elif db.get_dataset_type() == 'regression': if norm_sum > attr_removed_sum: # Add to removal queue. removal_queue.append(attr_idx) final_attrs = k_nearest.get_class_cols() print("ATTRINBUTE IDX:", removal_queue) # Removes the attribute idx values from the database's list of attribute columns to be used. for attr_idx in removal_queue: final_attrs.remove(attr_idx) db.set_classifier_attr_cols(final_attrs) print(database, " is using attribute colums: ", db.get_classifier_attr_cols()) print(db.get_dataset_type())
from data_viewer import data_viewer from wind_simulation import wind_simulation from autopilot import autopilot from observer import observer from tools.signals import signals from waypoint_viewer import waypoint_viewer from path_follower import path_follower from path_manager import path_manager # initialize dynamics object dyn = Dynamics(SIM.ts_sim) wind = wind_simulation(SIM.ts_sim) ctrl = autopilot(SIM.ts_sim) obsv = observer(SIM.ts_sim) path_follow = path_follower() path_manage = path_manager() # waypoint definition waypoints = msg_waypoints() waypoints.type = 'straight_line' waypoints.type = 'fillet' waypoints.type = 'dubins' waypoints.num_waypoints = 4 Va = PLAN.Va0 waypoints.ned[:, 0:waypoints.num_waypoints] = np.array([[0, 0, -100], [1000, 0, -100], [0, 1000, -100], [1000, 1000, -100]]).T waypoints.airspeed[:, 0:waypoints.num_waypoints] = np.array([[Va, Va, Va, Va]]) waypoints.course[:, 0:waypoints.num_waypoints] = np.array( [[0, np.radians(45), np.radians(45),
def main(): pm = path_manager() selected_dbs = select_db(pm.find_folders(pm.get_databases_dir())) for database in selected_dbs: # NOTE OUTPUT WILL WRITE TO A FILE, AS DEFINED BELOW: # MAKE SURE TO CREATE THIS DIRECTORY BEFORE YOU RUN, AND YOU CAN # SHOW THE FILE THAT'S CREATED IN THE VIDEO FOR OUTPUT filename = "../output/kmedoids/" + database + "_output.txt" output_file = open(filename, "w+") db = prepare_db(database, pm) k_nn = knn(5, db.get_dataset_type(), db.get_classifier_col(), db.get_classifier_attr_cols()) classes = db.get_class_list() if db.get_dataset_type( ) == 'classification' else [] class_count = len( classes) if db.get_dataset_type() == 'classification' else 1 X = process_data.shuffle_all(db.get_data(), 1) y = np.array(db.get_data())[:, db.get_classifier_col()] # RUN K-MEDOIDS ------------------------------------------------------------ print("RUNNING K-MEDOIDS") kc = kcluster(10, 10, db.get_data(), db.get_classifier_attr_cols(), 'k-medoids') indices = kc.get_medoids() centers = [db.get_data()[i] for i in indices] rbf = RBF(len(centers), class_count, output_file, 25) rbf.fit(X, centers, y, db.get_dataset_type(), classes) print("INITIAL WEIGHTS: ", rbf.weights) output_file.write("INITIAL WEIGHTS: \n") output_file.write(str(rbf.weights) + "\n") print("CENTERS: ", centers) output_file.write("FINAL WEIGHTS: \n") output_file.write(str(rbf.weights) + "\n") output_file.write("FINAL TESTS: \n") rbf.test(X, db.get_dataset_type(), y, centers, classes) print("FINALS WEIGHTS:") print(rbf.weights) # ---------------------------------------------------------------------------- # BEGIN classification FFNN if db.get_dataset_type() == 'classification': # BEGIN preprocessing process_data.FFNN_encoding(db) # (1) First layer (input layer) has 1 node per attribute. # (2) Hidden layers has arbitrary number of nodes. # (3) Output layer has 1 node per possible classification. layer_sizes = [len(db.get_attr()), 10, len(db.get_class_list())] # (3) # This number is arbitrary. # NOTICE: Tune this per dataset learning_rate = .5 ffnn = FFNN(layer_sizes, db.get_dataset_type(), db_name, db.get_data(), learning_rate) # BEGIN regression FFNN elif db.get_dataset_type() == 'regression': process_data.FFNN_encoding(db) # (1) First layer (input layer) has 1 node per attribute. # (2) Hidden layers has arbitrary number of nodes. # (3) Output layer has 1 node, just some real number. layer_sizes = [len(db.get_attr()) - 1, 5, 5, 1] learning_rate = .0001 ffnn = FFNN(layer_sizes, db.get_dataset_type(), db_name, db.get_data(), learning_rate) else: print('Database type invalid. Type = ' + db.get_dataset_type())
def main_execution(): # Sets to store all the loss func avs k_nn_classification_avgs = [] k_nn_regress_avgs = [] enn_avgs = [] cnn_avgs = [] k_means_classification_avgs = [] k_means_regress_avgs = [] k_medoid_classification_avgs = [] k_medoid_regress_avgs = [] reduction_funcs = ['edited_nn', 'condensed_nn', 'k_means', 'k_medoids'] # Initializes path manager with default directory as databases. pm = path_manager() # Loads in a list of database folders # for the user to select as the current database. selected_dbs = select_db(pm.find_folders(pm.get_databases_dir())) for database in selected_dbs: db = prepare_db(database, pm) k_nearest = knn(15, db.get_dataset_type(), \ db.get_classifier_col(), \ db.get_classifier_attr_cols()) # Start k-fold cross validation print("RUNNING K-FOLD CROSS VALIDATION") # Prepare data for k-fold binned_data, bin_lengths = process_data.separate_data( db.get_attr(), db.get_data()) # Extract validation set bin_lengths, validate_data, binned_data = validate.get_validate( bin_lengths, binned_data) debug_file.write('\n\nVALIDATION DATA: \n') for row in validate_data: debug_file.write(str(row) + '\n') # Run k-fold on just k-means first k_fold_results = validate.k_fold(9, binned_data, \ validate_data, bin_lengths, \ db, True, db.get_dataset_type(), \ k_nearest, debug_file, output_file,) if db.get_dataset_type() == 'classification': k_nn_classification_avgs.append( sum(k_fold_results) / len(k_fold_results)) elif db.get_dataset_type() == 'regression': k_nn_regress_avgs.append(sum(k_fold_results) / len(k_fold_results)) output_file.write('\n\n\n') # Loop thru all reduction functions for func in reduction_funcs: print('RUNNING ', func) # we are removing a bin from bin_lengths if db.get_dataset_type() == 'classification': k_fold_results = validate.k_fold(9, binned_data, \ validate_data, bin_lengths, db, \ True, db.get_dataset_type(), \ k_nearest, debug_file, output_file, func) if func == 'edited_nn': enn_avgs.append(sum(k_fold_results) / len(k_fold_results)) elif func == 'condensed_nn': cnn_avgs.append(sum(k_fold_results) / len(k_fold_results)) elif func == 'k_means': k_means_classification_avgs.append( sum(k_fold_results) / len(k_fold_results)) elif func == 'k_medoids': k_medoid_classification_avgs.append( sum(k_fold_results) / len(k_fold_results)) elif db.get_dataset_type() == 'regression': if func == 'edited_nn' or func == 'condensed_nn': continue # Shrink data to quarter of the size db_small = process_data.random_data_from(db.get_data(), 0.25) # Re-bin it after shrinking binned_data, bin_lengths = process_data.separate_data( db.get_attr(), db_small) k_fold_results = validate.k_fold(9, binned_data, \ validate_data, bin_lengths, db, \ True, db.get_dataset_type(), \ k_nearest, debug_file, output_file, func) if func == 'k_means': pass elif func == 'k_medoids': pass output_file.write('K FOLD RESULTS: ' + str(k_fold_results)) output_file.write('\n\n\n') print("KNN CLASSIFICATION AVGS: ", k_nn_classification_avgs) print("KNN REGRESSION AVGS: ", k_nn_regress_avgs) print("ENN AVGS: ", enn_avgs) print("CNN AVGS: ", cnn_avgs) print("K MEANS CLASSIFICATION AVGS: ", k_means_classification_avgs) print("K MEANS REGRESSION AVGS: ", k_means_regress_avgs) print("K MEDOID CLASSIFICATION AVGS: ", k_medoid_classification_avgs) print("K MEDOID REGRESSION AVGS", k_medoid_regress_avgs)