Beispiel #1
0
def main():
    pm = path_manager()
    selected_dbs = select_db(pm.find_folders(pm.get_databases_dir()))

    for database in selected_dbs:

        # TODO fix this band-aid when i get a f**k
        if database[0] != 'all':
            db = prepare_db(database[0], pm)
        else:
            print(
                "we don't support that shit right now\nit's a feature, not a bug :)"
            )
            sys.exit()

            # BEGIN classification FFNN
        if db.get_dataset_type() == 'classification':

            # BEGIN preprocessing
            process_data.FFNN_encoding(db)

            # (1) First layer (input layer) has 1 node per attribute.
            # (2) Hidden layers has arbitrary number of nodes.
            # (3) Output layer has 1 node per possible classification.
            layer_sizes = [
                len(db.get_attr()) - 1, 50,
                len(db.get_class_list())
            ]

            # This number is arbitrary.
            # TODO Tune this per dataset
            learning_rate = .3

            ffnn = FFNN(layer_sizes, db.get_data(), db.get_dataset_type(),
                        learning_rate)
            sys.exit()

        # BEGIN regression FFNN
        elif db.get_dataset_type() == 'regression':

            process_data.FFNN_encoding(db)

            # (1) First layer (input layer) has 1 node per attribute.
            # (2) Hidden layers has arbitrary number of nodes.
            # (3) Output layer has 1 node, just some real number.
            layer_sizes = [len(db.get_attr()) - 1, 50, 1]

            # machine's learning rate
            # learning_rate = .60

            # Forest fire's learning rate
            learning_rate = .01

            ffnn = FFNN(layer_sizes, db.get_data(), db.get_dataset_type(),
                        learning_rate)
            sys.exit()

        else:
            print('Database type invalid. Type = ' + db.get_dataset_type())
import Cost_Functions as cf
from FFNN import FFNN
from path_manager import pathManager as path_manager
import numpy as np
import os.path
import prepare_data
import shared_functions as sf
import pso

pm = path_manager()
selected_dbs = prepare_data.select_db(pm.find_folders(pm.get_databases_dir()))

db = prepare_data.prepare_db(selected_dbs[0], pm)
process_data.shuffle_all(db.get_data(), 1)

process_data.FFNN_encoding(db)

half_idx = int(len(db.get_data()) * .5)

# FFNN stuff

# BEGIN classification FFNN
if db.get_dataset_type() == 'classification':
    # (1) First layer (input layer) has 1 node per attribute.
    # (2) Hidden layers has arbitrary number of nodes.
    # (3) Output layer has 1 node per possible classification.
    layer_sizes = [
        len(db.get_attr()) - 1,  # (1)
        5,
        5,  # (2)
        len(db.get_class_list())
Beispiel #3
0
def main():
    pm = path_manager()
    selected_dbs = select_db(pm.find_folders(pm.get_databases_dir()))

    for database in selected_dbs:
        # NOTE OUTPUT WILL WRITE TO A FILE, AS DEFINED BELOW:
        # MAKE SURE TO CREATE THIS DIRECTORY BEFORE YOU RUN, AND YOU CAN
        # SHOW THE FILE THAT'S CREATED IN THE VIDEO FOR OUTPUT
        filename = "../output/kmedoids/" + database + "_output.txt"
        output_file = open(filename, "w+")

        db = prepare_db(database, pm)
        k_nn = knn(5, db.get_dataset_type(), db.get_classifier_col(),
                   db.get_classifier_attr_cols())
        classes = db.get_class_list() if db.get_dataset_type(
        ) == 'classification' else []
        class_count = len(
            classes) if db.get_dataset_type() == 'classification' else 1
        X = process_data.shuffle_all(db.get_data(), 1)
        y = np.array(db.get_data())[:, db.get_classifier_col()]

        # RUN K-MEDOIDS ------------------------------------------------------------
        print("RUNNING K-MEDOIDS")
        kc = kcluster(10, 10, db.get_data(), db.get_classifier_attr_cols(),
                      'k-medoids')
        indices = kc.get_medoids()
        centers = [db.get_data()[i] for i in indices]
        rbf = RBF(len(centers), class_count, output_file, 25)
        rbf.fit(X, centers, y, db.get_dataset_type(), classes)
        print("INITIAL WEIGHTS: ", rbf.weights)
        output_file.write("INITIAL WEIGHTS: \n")
        output_file.write(str(rbf.weights) + "\n")
        print("CENTERS: ", centers)
        output_file.write("FINAL WEIGHTS: \n")
        output_file.write(str(rbf.weights) + "\n")
        output_file.write("FINAL TESTS: \n")
        rbf.test(X, db.get_dataset_type(), y, centers, classes)
        print("FINALS WEIGHTS:")
        print(rbf.weights)
        # ----------------------------------------------------------------------------

        # BEGIN classification FFNN
        if db.get_dataset_type() == 'classification':

            # BEGIN preprocessing
            process_data.FFNN_encoding(db)

            # (1) First layer (input layer) has 1 node per attribute.
            # (2) Hidden layers has arbitrary number of nodes.
            # (3) Output layer has 1 node per possible classification.

            layer_sizes = [len(db.get_attr()), 10,
                           len(db.get_class_list())]  # (3)

            # This number is arbitrary.
            # NOTICE: Tune this per dataset
            learning_rate = .5

            ffnn = FFNN(layer_sizes, db.get_dataset_type(), db_name,
                        db.get_data(), learning_rate)

            # BEGIN regression FFNN
        elif db.get_dataset_type() == 'regression':

            process_data.FFNN_encoding(db)

            # (1) First layer (input layer) has 1 node per attribute.
            # (2) Hidden layers has arbitrary number of nodes.
            # (3) Output layer has 1 node, just some real number.
            layer_sizes = [len(db.get_attr()) - 1, 5, 5, 1]

            learning_rate = .0001

            ffnn = FFNN(layer_sizes, db.get_dataset_type(), db_name,
                        db.get_data(), learning_rate)

        else:
            print('Database type invalid. Type = ' + db.get_dataset_type())