def cfunc_symmetry(): cases = [] dataset = tft.gen_symvect_dataset(data_src_params[0], data_src_params[1]) for i in range(data_src_params[1]): if tft.check_vector_symmetry(dataset[i]): label = [1, 0] else: label = [0, 1] cases.append([dataset[i], label]) return cases
def symmetry(self): length = input("Enter the length of the vectors. 101 set to default: ") length = length if length else 101 count = input("Enter the number of vectors. Default set to 2000: ") count = count if count else 2000 case_generator = (lambda: TFT.gen_symvect_dataset(length, count)) case_man = Caseman(cfunc=case_generator, vfrac=self.params.vfrac, tfrac=self.params.tfrac) self.params.dims[0] = len(case_man.training_cases[0][0]) self.params.dims[-1] = len(case_man.training_cases[0][1]) print("\nNumber of bits taken from input layer: ", self.params.dims[0], "and output set to target vector length at: ", self.params.dims[-1]) self.ann.set_cman(case_man) model = self.build_ann() self.ann.set_model(model) model.run(steps=self.params.steps, bestk=self.params.bestk) self.check_mapping_and_dendro()
def symmetry(epochs=3000, nbits=101, ncases=2000, learning_rate=0.001, showint=1000, batch_size=512, vfrac=0.1, tfrac=0.1, vint=200, sm=True, bestk=1): case_generator = (lambda: TFT.gen_symvect_dataset(nbits, ncases)) case_manager = CaseManager(cfunc=case_generator, vfrac=vfrac, tfrac=tfrac) ann = Gann(dims=[nbits, nbits * 3, 2], case_manager=case_manager, learning_rate=learning_rate, showint=showint, batch_size=batch_size, vint=vint, softmax=sm) ann.run(epochs, bestk=bestk) return ann
def source(self): def normalize(cases): input = [c[0] for c in cases] target = [c[1] for c in cases] input = numpy.array(input) min_arr = numpy.min(input, axis=0) max_arr = numpy.max(input, axis=0) for element in input: for i, e in enumerate(element): element[i] = (e - min_arr[i]) / (max_arr[i] - min_arr[i]) return list(zip(input, target)) def to_float(inp): # returns 0 if input is ? (questionmark) return 0 if inp == '?' else float(inp) self.source_is_called = True print("source:", self.args.source) data_set = [] if self.args.source[-4:] == ".txt": with open("data_set_files/" + self.args.source) as file: data = list( map(lambda x: re.split("[;,]", x), file.readlines())) data = list(map(lambda x: list(map(to_float, x)), data)) max_d = max(map(lambda x: int(x[-1]), data)) for element in data: input = element[:-1] target = TFT.int_to_one_hot(int(element[-1]) - 1, max_d) data_set.append([input, target]) elif self.args.source == "parity": if self.args.sourceinit is None: data_set = TFT.gen_all_parity_cases(10) else: data_set = TFT.gen_all_parity_cases(self.args.sourceinit[0]) elif self.args.source == "symmetry": if self.args.sourceinit is None: vecs = TFT.gen_symvect_dataset(101, 2000) else: vecs = TFT.gen_symvect_dataset(self.args.sourceinit[0], self.args.sourceinit[1]) inputs = list(map(lambda x: x[:-1], vecs)) targets = list(map(lambda x: TFT.int_to_one_hot(x[-1], 2), vecs)) data_set = list(zip(inputs, targets)) elif self.args.source == "auto_onehot": if self.args.sourceinit is None: data_set = TFT.gen_all_one_hot_cases(64) else: data_set = TFT.gen_all_one_hot_cases(self.args.sourceinit[0]) elif self.args.source == "auto_dense": if self.args.sourceinit is None: data_set = TFT.gen_dense_autoencoder_cases(2000, 100) else: data_set = TFT.gen_dense_autoencoder_cases( self.args.sourceinit[0], self.args.sourceinit[1]) elif self.args.source == "bitcounter": if self.args.sourceinit is None: data_set = TFT.gen_vector_count_cases(500, 15) else: data_set = TFT.gen_vector_count_cases(self.args.sourceinit[0], self.args.sourceinit[1]) elif self.args.source == "segmentcounter": if self.args.sourceinit is None: data_set = TFT.gen_segmented_vector_cases(25, 1000, 0, 8) else: data_set = TFT.gen_segmented_vector_cases(self.args.sourceinit[0], \ self.args.sourceinit[1], self.args.sourceinit[2], self.args.sourceinit[3]) elif self.args.source == "mnist": # mnist_basics.load_all_flat_cases(type='testing') cases = mnist_basics.load_all_flat_cases(type='training') input = cases[0] target = cases[1] input = list(map(lambda x: list(map(lambda e: e / 255, x)), input)) target = list(map(lambda x: TFT.int_to_one_hot(x, 10), target)) data_set = list(zip(input, target)) if data_set == []: print(self.args.source, " is illegal for argument --source") print("Legal values are: <filenme>.txt, parity, symmetry, \ auto_onehot, auto_dense, bitcounter, segmentcounter", sep="") quit() if self.args.source[-4:] == ".txt": data_set = normalize(data_set) return data_set
def data_loader(self, dataset, caseFraction, testFraction, validationFraction): if dataset == "parity": length = int(input("Length of vectors: ")) doubleFlag = input("Activate double flag y/n: ") ds = CaseManager(TFT.gen_all_parity_cases(length, doubleFlag == "y"), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) #Default values for parity self.mp.layer_dims = [10, 20, 40, 20, 1] self.mp.learning_rate = 0.001 self.mp.hidden_activation_function = "relu" self.mp.softmax = False self.mp.w_range = "scaled" self.mp.bestk = None self.mp.epochs = 400 self.mp.error_function = "mse" self.mp.optimizer = "adam" self.mp.minibatch_size = 100 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False #use this to set size of input layer print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "symmetry": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of vectors: ")) ds = CaseManager(TFT.gen_symvect_dataset(vectorLength, vectorNumber), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) #Default values for symmetry self.mp.layer_dims = [vectorLength, 40, 20, 1] self.mp.learning_rate = 0.001 self.mp.hidden_activation_function = "relu" self.mp.softmax = False self.mp.w_range = "scaled" self.mp.bestk = None self.mp.epochs = 70 self.mp.optimizer = "adam" self.mp.error_function = "mse" self.mp.minibatch_size = 8 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "autoencoder": vectorLength = int(input("Set lenght of vectors: ")) ds = CaseManager(TFT.gen_all_one_hot_cases(vectorLength), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "bitcounter": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of input vector: ")) ds = CaseManager(TFT.gen_vector_count_cases( vectorNumber, vectorLength), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) print(ds.training_cases[0]) #Default values for bitcounter self.mp.layer_dims = [15, 128, 64, 32, 16] self.mp.softmax = True self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.001 self.mp.epochs = 100 self.mp.w_range = [-0.01, 0.1] self.mp.error_function = "ce" self.mp.optimizer = "adam" self.mp.minibatch_size = 16 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False elif dataset == "segmentcounter": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of input vector: ")) minSeg = int(input("Minimum number of segments: ")) maxSeg = int(input("Maximum number of segments: ")) ds = CaseManager(TFT.gen_segmented_vector_cases( vectorLength, vectorNumber, minSeg, maxSeg), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) self.mp.layer_dims = [25, 128, 64, 32, 12, 9] self.mp.softmax = True self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.0005 self.mp.epochs = 200 self.mp.w_range = [0, 0.1] self.mp.error_function = "ce" self.mp.optimizer = "adam" self.mp.minibatch_size = 32 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False elif dataset == "mnist": cases = mnist.load_flat_text_cases( "all_flat_mnist_training_cases_text.txt") if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) numbersFordeling = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] for case in cases: num = case[1].index(1) numbersFordeling[num] += 1 print("number of cases: " + str(len(cases))) print(numbersFordeling) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for mnist self.mp.layer_dims = [784, 512, 10] self.mp.softmax = False self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.001 self.mp.w_range = [0, 0.1] self.mp.epochs = 10 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.minibatch_size = 20 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) print(ds.training_cases[0]) elif dataset == "wine": print("\n") #TODO : load wine dataset in correct format filereader = fr.FileReader() cases = filereader.readfile( "wine.txt", 9 if self.mp.custom_buckets is None else 6, [3, 4, 5, 6, 7, 8], True) print("first: " + str(len(cases))) if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) print("second: " + str(len(cases))) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for wine self.mp.layer_dims = [11, 512, 256, 128, 64, 32, 6] self.softmax = False self.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.002 self.mp.w_range = "scaled" self.mp.epochs = 20 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.custom_buckets = [1] self.mp.minibatch_size = 32 self.mp.lr_freq = 150 self.mp.bs_freq = 150 self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print((ds.training_cases[0])) print((ds.training_cases[0][0])) print((ds.training_cases[0][1])) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) i = 0 for case in ds.training_cases: try: if len(case[0]) != len(ds.training_cases[0][0]): print(len(case[0])) except Exception as e: print("HEI!! input") print(case) print("line nr " + str(i)) try: if len(case[1]) != len(ds.training_cases[0][1]): print(len(case[1])) except Exception as e: print("HEI!! target") print(case) print("line nr " + str(i)) i += 1 elif dataset == "glass": print("\n") filereader = fr.FileReader() cases = filereader.readfile( "glass.txt", 8 if self.mp.custom_buckets is None else 6, [1, 2, 3, 5, 6, 7], True) if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for glass self.mp.layer_dims = [9, 512, 256, 64, 32, 6] self.softmax = False self.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.002 self.mp.w_range = [0, 0.1] self.mp.epochs = 200 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.custom_buckets = [1] self.mp.minibatch_size = 16 self.mp.lr_freq = 100 self.mp.bs_freq = 150 self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print((ds.training_cases[0])) print((ds.training_cases[0][0])) print((ds.training_cases[0][1])) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) for case in ds.training_cases: if len(case[0]) != len(ds.training_cases[0][0]): print("HEI!! input") if len(case[1]) != len(ds.training_cases[0][1]): print("HEI!! target")
import random from io import BytesIO from collections import defaultdict import pandas as pd import seaborn as sns __mnist_path__ = "/Users/hakongrov/Documents/INDØK/4.År/AI-Prog/Files/dataset/mnist-zip/" # ******* A General Artificial Neural Network ******** # This is the original GANN, which has been improved in the file gann.py #Global variable including all the functions for generating different datasets. The name of the dataset is the key in the dict. _generator = { "symmetry": lambda length, count: TFT.gen_symvect_dataset( length, count ), #Needs spesific length and number of cases: length, count "parity": lambda a: TFT.gen_all_parity_cases( a), #Need spesific length parameter, Double flag? "autoencoder": lambda a: ( TFT.gen_all_one_hot_cases(a) ), #or TFT.gen_dense_autoencoder_cases(count, size) #Needs a spesific length and size for the last a spesific density of one's "bit counter": lambda num, size: TFT.gen_vector_count_cases( num, size), #Dimensions = [same as input, hidden, input + 1] "segment counter": lambda a, b, c, d: TFT.gen_segmented_vector_cases(a, b, c, d) }