Example #1
0
 def cfunc_symmetry():
     cases = []
     dataset = tft.gen_symvect_dataset(data_src_params[0],
                                       data_src_params[1])
     for i in range(data_src_params[1]):
         if tft.check_vector_symmetry(dataset[i]):
             label = [1, 0]
         else:
             label = [0, 1]
         cases.append([dataset[i], label])
     return cases
Example #2
0
 def symmetry(self):
     length = input("Enter the length of the vectors. 101 set to default: ")
     length = length if length else 101
     count = input("Enter the number of vectors. Default set to 2000: ")
     count = count if count else 2000
     case_generator = (lambda: TFT.gen_symvect_dataset(length, count))
     case_man = Caseman(cfunc=case_generator, vfrac=self.params.vfrac, tfrac=self.params.tfrac)
     self.params.dims[0] = len(case_man.training_cases[0][0])
     self.params.dims[-1] = len(case_man.training_cases[0][1])
     print("\nNumber of bits taken from input layer: ", self.params.dims[0],
           "and output set to target vector length at: ", self.params.dims[-1])
     self.ann.set_cman(case_man)
     model = self.build_ann()
     self.ann.set_model(model)
     model.run(steps=self.params.steps, bestk=self.params.bestk)
     self.check_mapping_and_dendro()
Example #3
0
def symmetry(epochs=3000,
             nbits=101,
             ncases=2000,
             learning_rate=0.001,
             showint=1000,
             batch_size=512,
             vfrac=0.1,
             tfrac=0.1,
             vint=200,
             sm=True,
             bestk=1):
    case_generator = (lambda: TFT.gen_symvect_dataset(nbits, ncases))
    case_manager = CaseManager(cfunc=case_generator, vfrac=vfrac, tfrac=tfrac)
    ann = Gann(dims=[nbits, nbits * 3, 2],
               case_manager=case_manager,
               learning_rate=learning_rate,
               showint=showint,
               batch_size=batch_size,
               vint=vint,
               softmax=sm)
    ann.run(epochs, bestk=bestk)
    return ann
Example #4
0
    def source(self):
        def normalize(cases):
            input = [c[0] for c in cases]
            target = [c[1] for c in cases]
            input = numpy.array(input)
            min_arr = numpy.min(input, axis=0)
            max_arr = numpy.max(input, axis=0)
            for element in input:
                for i, e in enumerate(element):
                    element[i] = (e - min_arr[i]) / (max_arr[i] - min_arr[i])
            return list(zip(input, target))

        def to_float(inp):
            # returns 0 if input is ? (questionmark)
            return 0 if inp == '?' else float(inp)

        self.source_is_called = True
        print("source:", self.args.source)
        data_set = []
        if self.args.source[-4:] == ".txt":
            with open("data_set_files/" + self.args.source) as file:
                data = list(
                    map(lambda x: re.split("[;,]", x), file.readlines()))
                data = list(map(lambda x: list(map(to_float, x)), data))
            max_d = max(map(lambda x: int(x[-1]), data))
            for element in data:
                input = element[:-1]
                target = TFT.int_to_one_hot(int(element[-1]) - 1, max_d)
                data_set.append([input, target])
        elif self.args.source == "parity":
            if self.args.sourceinit is None:
                data_set = TFT.gen_all_parity_cases(10)
            else:
                data_set = TFT.gen_all_parity_cases(self.args.sourceinit[0])
        elif self.args.source == "symmetry":
            if self.args.sourceinit is None:
                vecs = TFT.gen_symvect_dataset(101, 2000)
            else:
                vecs = TFT.gen_symvect_dataset(self.args.sourceinit[0],
                                               self.args.sourceinit[1])
            inputs = list(map(lambda x: x[:-1], vecs))
            targets = list(map(lambda x: TFT.int_to_one_hot(x[-1], 2), vecs))
            data_set = list(zip(inputs, targets))
        elif self.args.source == "auto_onehot":
            if self.args.sourceinit is None:
                data_set = TFT.gen_all_one_hot_cases(64)
            else:
                data_set = TFT.gen_all_one_hot_cases(self.args.sourceinit[0])
        elif self.args.source == "auto_dense":
            if self.args.sourceinit is None:
                data_set = TFT.gen_dense_autoencoder_cases(2000, 100)
            else:
                data_set = TFT.gen_dense_autoencoder_cases(
                    self.args.sourceinit[0], self.args.sourceinit[1])
        elif self.args.source == "bitcounter":
            if self.args.sourceinit is None:
                data_set = TFT.gen_vector_count_cases(500, 15)
            else:
                data_set = TFT.gen_vector_count_cases(self.args.sourceinit[0],
                                                      self.args.sourceinit[1])
        elif self.args.source == "segmentcounter":
            if self.args.sourceinit is None:
                data_set = TFT.gen_segmented_vector_cases(25, 1000, 0, 8)
            else:
                data_set = TFT.gen_segmented_vector_cases(self.args.sourceinit[0], \
                            self.args.sourceinit[1], self.args.sourceinit[2], self.args.sourceinit[3])
        elif self.args.source == "mnist":
            # mnist_basics.load_all_flat_cases(type='testing')
            cases = mnist_basics.load_all_flat_cases(type='training')
            input = cases[0]
            target = cases[1]
            input = list(map(lambda x: list(map(lambda e: e / 255, x)), input))
            target = list(map(lambda x: TFT.int_to_one_hot(x, 10), target))
            data_set = list(zip(input, target))

        if data_set == []:
            print(self.args.source, " is illegal for argument --source")
            print("Legal values are: <filenme>.txt, parity, symmetry, \
                        auto_onehot, auto_dense, bitcounter, segmentcounter",
                  sep="")
            quit()
        if self.args.source[-4:] == ".txt":
            data_set = normalize(data_set)
        return data_set
Example #5
0
    def data_loader(self, dataset, caseFraction, testFraction,
                    validationFraction):
        if dataset == "parity":
            length = int(input("Length of vectors: "))
            doubleFlag = input("Activate double flag y/n: ")
            ds = CaseManager(TFT.gen_all_parity_cases(length,
                                                      doubleFlag == "y"),
                             validation_fraction=validationFraction,
                             test_fraction=testFraction)
            self.openAA.set_case_manager(ds)

            #Default values for parity
            self.mp.layer_dims = [10, 20, 40, 20, 1]
            self.mp.learning_rate = 0.001
            self.mp.hidden_activation_function = "relu"
            self.mp.softmax = False
            self.mp.w_range = "scaled"
            self.mp.bestk = None
            self.mp.epochs = 400
            self.mp.error_function = "mse"
            self.mp.optimizer = "adam"
            self.mp.minibatch_size = 100
            self.mp.lr_freq = None
            self.mp.bs_freq = None
            self.custom_buckets = None
            self.target_accuracy = None
            self.early_stopping = False

            #use this to set size of input layer
            print("Input size: " + str(len(ds.training_cases[0][0])) +
                  ", Output size: " + str(len(ds.training_cases[0][1])))

        elif dataset == "symmetry":
            vectorNumber = int(input("Number of cases: "))
            vectorLength = int(input("Length of vectors: "))
            ds = CaseManager(TFT.gen_symvect_dataset(vectorLength,
                                                     vectorNumber),
                             validation_fraction=validationFraction,
                             test_fraction=testFraction)
            self.openAA.set_case_manager(ds)

            #Default values for symmetry
            self.mp.layer_dims = [vectorLength, 40, 20, 1]
            self.mp.learning_rate = 0.001
            self.mp.hidden_activation_function = "relu"
            self.mp.softmax = False
            self.mp.w_range = "scaled"
            self.mp.bestk = None
            self.mp.epochs = 70
            self.mp.optimizer = "adam"
            self.mp.error_function = "mse"
            self.mp.minibatch_size = 8
            self.mp.lr_freq = None
            self.mp.bs_freq = None

            self.custom_buckets = None
            self.target_accuracy = None
            self.early_stopping = False

            print("Input size: " + str(len(ds.training_cases[0][0])) +
                  ", Output size: " + str(len(ds.training_cases[0][1])))

        elif dataset == "autoencoder":
            vectorLength = int(input("Set lenght of vectors: "))
            ds = CaseManager(TFT.gen_all_one_hot_cases(vectorLength),
                             validation_fraction=validationFraction,
                             test_fraction=testFraction)
            self.openAA.set_case_manager(ds)
            print("Input size: " + str(len(ds.training_cases[0][0])) +
                  ", Output size: " + str(len(ds.training_cases[0][1])))

        elif dataset == "bitcounter":
            vectorNumber = int(input("Number of cases: "))
            vectorLength = int(input("Length of input vector: "))
            ds = CaseManager(TFT.gen_vector_count_cases(
                vectorNumber, vectorLength),
                             validation_fraction=validationFraction,
                             test_fraction=testFraction)
            self.openAA.set_case_manager(ds)
            print("Input size: " + str(len(ds.training_cases[0][0])) +
                  ", Output size: " + str(len(ds.training_cases[0][1])))
            print(ds.training_cases[0])

            #Default values for bitcounter
            self.mp.layer_dims = [15, 128, 64, 32, 16]
            self.mp.softmax = True
            self.mp.hidden_activation_function = "relu"
            self.mp.bestk = 1
            self.mp.learning_rate = 0.001
            self.mp.epochs = 100
            self.mp.w_range = [-0.01, 0.1]
            self.mp.error_function = "ce"
            self.mp.optimizer = "adam"
            self.mp.minibatch_size = 16
            self.mp.lr_freq = None
            self.mp.bs_freq = None

            self.custom_buckets = None
            self.target_accuracy = None
            self.early_stopping = False

        elif dataset == "segmentcounter":
            vectorNumber = int(input("Number of cases: "))
            vectorLength = int(input("Length of input vector: "))
            minSeg = int(input("Minimum number of segments: "))
            maxSeg = int(input("Maximum number of segments: "))
            ds = CaseManager(TFT.gen_segmented_vector_cases(
                vectorLength, vectorNumber, minSeg, maxSeg),
                             validation_fraction=validationFraction,
                             test_fraction=testFraction)
            self.openAA.set_case_manager(ds)
            print("Input size: " + str(len(ds.training_cases[0][0])) +
                  ", Output size: " + str(len(ds.training_cases[0][1])))

            self.mp.layer_dims = [25, 128, 64, 32, 12, 9]
            self.mp.softmax = True
            self.mp.hidden_activation_function = "relu"
            self.mp.bestk = 1
            self.mp.learning_rate = 0.0005
            self.mp.epochs = 200
            self.mp.w_range = [0, 0.1]
            self.mp.error_function = "ce"
            self.mp.optimizer = "adam"
            self.mp.minibatch_size = 32
            self.mp.lr_freq = None
            self.mp.bs_freq = None

            self.custom_buckets = None
            self.target_accuracy = None
            self.early_stopping = False

        elif dataset == "mnist":
            cases = mnist.load_flat_text_cases(
                "all_flat_mnist_training_cases_text.txt")
            if caseFraction != 1:
                cases = TFT.get_fraction_of_cases(cases, caseFraction)
            numbersFordeling = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
            for case in cases:
                num = case[1].index(1)
                numbersFordeling[num] += 1
            print("number of cases: " + str(len(cases)))
            print(numbersFordeling)
            ds = CaseManager(cases,
                             validation_fraction=validationFraction,
                             test_fraction=testFraction)

            #Default values for mnist
            self.mp.layer_dims = [784, 512, 10]
            self.mp.softmax = False
            self.mp.hidden_activation_function = "relu"
            self.mp.bestk = 1
            self.mp.learning_rate = 0.001
            self.mp.w_range = [0, 0.1]
            self.mp.epochs = 10
            self.mp.error_function = "sce"
            self.mp.optimizer = "adam"
            self.mp.minibatch_size = 20
            self.mp.lr_freq = None
            self.mp.bs_freq = None

            self.custom_buckets = None
            self.target_accuracy = None
            self.early_stopping = False

            self.openAA.set_case_manager(ds)
            print("Input size: " + str(len(ds.training_cases[0][0])) +
                  ", Output size: " + str(len(ds.training_cases[0][1])))
            print(ds.training_cases[0])

        elif dataset == "wine":
            print("\n")
            #TODO : load wine dataset in correct format
            filereader = fr.FileReader()
            cases = filereader.readfile(
                "wine.txt", 9 if self.mp.custom_buckets is None else 6,
                [3, 4, 5, 6, 7, 8], True)
            print("first: " + str(len(cases)))
            if caseFraction != 1:
                cases = TFT.get_fraction_of_cases(cases, caseFraction)
            print("second: " + str(len(cases)))
            ds = CaseManager(cases,
                             validation_fraction=validationFraction,
                             test_fraction=testFraction)

            #Default values for wine
            self.mp.layer_dims = [11, 512, 256, 128, 64, 32, 6]
            self.softmax = False
            self.hidden_activation_function = "relu"
            self.mp.bestk = 1
            self.mp.learning_rate = 0.002
            self.mp.w_range = "scaled"
            self.mp.epochs = 20
            self.mp.error_function = "sce"
            self.mp.optimizer = "adam"
            self.mp.custom_buckets = [1]
            self.mp.minibatch_size = 32
            self.mp.lr_freq = 150
            self.mp.bs_freq = 150

            self.target_accuracy = None
            self.early_stopping = False

            self.openAA.set_case_manager(ds)
            print((ds.training_cases[0]))
            print((ds.training_cases[0][0]))
            print((ds.training_cases[0][1]))
            print("Input size: " + str(len(ds.training_cases[0][0])) +
                  ", Output size: " + str(len(ds.training_cases[0][1])))
            i = 0
            for case in ds.training_cases:
                try:
                    if len(case[0]) != len(ds.training_cases[0][0]):
                        print(len(case[0]))
                except Exception as e:
                    print("HEI!!   input")
                    print(case)
                    print("line nr " + str(i))
                try:
                    if len(case[1]) != len(ds.training_cases[0][1]):
                        print(len(case[1]))
                except Exception as e:
                    print("HEI!!   target")
                    print(case)
                    print("line nr " + str(i))
                i += 1

        elif dataset == "glass":
            print("\n")
            filereader = fr.FileReader()
            cases = filereader.readfile(
                "glass.txt", 8 if self.mp.custom_buckets is None else 6,
                [1, 2, 3, 5, 6, 7], True)
            if caseFraction != 1:
                cases = TFT.get_fraction_of_cases(cases, caseFraction)
            ds = CaseManager(cases,
                             validation_fraction=validationFraction,
                             test_fraction=testFraction)

            #Default values for glass
            self.mp.layer_dims = [9, 512, 256, 64, 32, 6]
            self.softmax = False
            self.hidden_activation_function = "relu"
            self.mp.bestk = 1
            self.mp.learning_rate = 0.002
            self.mp.w_range = [0, 0.1]
            self.mp.epochs = 200
            self.mp.error_function = "sce"
            self.mp.optimizer = "adam"
            self.mp.custom_buckets = [1]
            self.mp.minibatch_size = 16
            self.mp.lr_freq = 100
            self.mp.bs_freq = 150

            self.target_accuracy = None
            self.early_stopping = False

            self.openAA.set_case_manager(ds)
            print((ds.training_cases[0]))
            print((ds.training_cases[0][0]))
            print((ds.training_cases[0][1]))
            print("Input size: " + str(len(ds.training_cases[0][0])) +
                  ", Output size: " + str(len(ds.training_cases[0][1])))
            for case in ds.training_cases:
                if len(case[0]) != len(ds.training_cases[0][0]):
                    print("HEI!!   input")
                if len(case[1]) != len(ds.training_cases[0][1]):
                    print("HEI!!   target")
Example #6
0
import random
from io import BytesIO
from collections import defaultdict
import pandas as pd
import seaborn as sns

__mnist_path__ = "/Users/hakongrov/Documents/INDØK/4.År/AI-Prog/Files/dataset/mnist-zip/"

# ******* A General Artificial Neural Network ********
# This is the original GANN, which has been improved in the file gann.py

#Global variable including all the functions for generating different datasets. The name of the dataset is the key in the dict.
_generator = {
    "symmetry":
    lambda length, count: TFT.gen_symvect_dataset(
        length, count
    ),  #Needs spesific length and number of cases: length, count
    "parity":
    lambda a: TFT.gen_all_parity_cases(
        a),  #Need spesific length parameter, Double flag?
    "autoencoder":
    lambda a: (
        TFT.gen_all_one_hot_cases(a)
    ),  #or TFT.gen_dense_autoencoder_cases(count, size) #Needs a spesific length and size for the last a spesific density of one's
    "bit counter":
    lambda num, size: TFT.gen_vector_count_cases(
        num, size),  #Dimensions = [same as input, hidden, input + 1]
    "segment counter":
    lambda a, b, c, d: TFT.gen_segmented_vector_cases(a, b, c, d)
}