def autoex(epochs=300, nbits=4, lrate=0.03, showint=100, mbs=None, vfrac=0.1, tfrac=0.1, vint=100, sm=False, bestk=None): size = 2**nbits mbs = mbs if mbs else size case_generator = (lambda: TFT.gen_all_one_hot_cases(2**nbits)) cman = Caseman(cfunc=case_generator, vfrac=vfrac, tfrac=tfrac) ann = Gann(dims=[size, nbits, size], cman=cman, lrate=lrate, showint=showint, mbs=mbs, vint=vint, softmax=sm) #ann.gen_probe(0,'wgt',('hist','avg')) # Plot a histogram and avg of the incoming weights to module 0. #ann.gen_probe(1,'out',('avg','max')) # Plot average and max value of module 1's output vector #ann.add_grabvar(0,'wgt') # Add a grabvar (to be displayed in its own matplotlib window). ann.run(epochs, bestk=bestk) ann.runmore(epochs * 2, bestk=bestk) return ann
def manage_data_loaders(function_name, params, loss_function): one_hot = False if (function_name == "load_mnist"): cases = mnist(params, loss_function) elif (function_name == "dataset_loader"): cases = dataset_loader(params[0], loss_function) elif (function_name == "gen_all_parity_cases"): if (len(params) == 1): cases = TFT.gen_all_parity_cases(int(params[0])) else: cases = TFT.gen_all_parity_cases(int(params[0]), bool(params[1])) elif (function_name == "gen_all_one_hot_cases"): one_hot = True cases = TFT.gen_all_one_hot_cases(2**int(params[0])) elif (function_name == "gen_vector_count_cases"): one_hot = True nof_cases = int(params[0]) length = int(params[1]) cases = TFT.gen_vector_count_cases(nof_cases, length) elif (function_name == "gen_segmented_vector_cases"): one_hot = True # ASSUMING all these parameters are given: try: nbits = int(params[0]) nof_cases = int(params[1]) min_seg = int(params[2]) max_seg = int(params[3]) except: print( "Not enough arguments given! [nbits, nof_cases, min_seg, max_seg]" ) return cases = TFT.gen_segmented_vector_cases(nbits, nof_cases, min_seg, max_seg) # Sparse need integers as labels, not vectors: if (loss_function == "sparse_softmax_cross_entropy"): bit_cases = [] for case in cases: if (function_name == "gen_all_parity_cases"): if (len(case[1]) == 1): bit_cases.append([case[0], case[1]]) else: bit_cases.append([case[0], case[1][1]]) else: for i, b in enumerate(case[1]): if (b): bit_cases.append([case[0], i]) break
def autoencoder(self): nbits = int(input("Enter the length of the vector in bits. " "Please be careful and not crash my shit with a number like 32: ")) case_generator = (lambda: TFT.gen_all_one_hot_cases(2 ** nbits)) case_man = Caseman(cfunc=case_generator, vfrac=self.params.vfrac, tfrac=self.params.tfrac) self.ann.set_cman(case_man) self.params.dims[0] = len(case_man.training_cases[0][0]) self.params.dims[-1] = len(case_man.training_cases[0][1]) print("\nNumber of bits taken from input layer: ", self.params.dims[0], "and output set to target vector length at: ", self.params.dims[-1]) model = self.build_ann() self.ann.set_model(model) # model.gen_probe(0, 'wgt', ('hist', 'avg')) # Plot a histogram and avg of the incoming weights to module 0. # model.gen_probe(1, 'out', ('avg', 'max')) # Plot average and max value of module 1's output vector # model.add_grabvar(0, 'wgt') # Add a grabvar (to be displayed in its own matplotlib window). model.run(steps=self.params.steps, bestk=self.params.bestk) self.check_mapping_and_dendro()
def autoex(epochs=10000, nbits=4, lrate=0.03, showint=100, mbs=None, vfrac=0.1, tfrac=0.1, vint=100, sm=False, bestk=None): size = 2**nbits mbs = mbs if mbs else size case_generator = (lambda: TFT.gen_all_one_hot_cases(2**nbits)) cman = Caseman(cfunc=case_generator, vfrac=vfrac, tfrac=tfrac) ann = Gann(dims=[size, nbits, size], cman=cman, lrate=lrate, showint=showint, mbs=mbs, vint=vint, softmax=sm) #ann.gen_probe(0,'wgt',('hist','avg')) # Plot a histogram and avg of the incoming weights to module 0. #ann.gen_probe(1,'out',('avg','max')) # Plot average and max value of module 1's output vector #ann.add_grabvar(0,'wgt') # Add a grabvar (to be displayed in its own matplotlib window). ann.run(epochs, bestk=bestk) ann.runmore(epochs * 2, bestk=bestk) correct = tf.equal(tf.argmax(ann.target, 1), tf.argmax(ann.output, 1)) accuracy = tf.reduce_mean(tf.cast(correct, "float")) print("Accuracy") ann.reopen_current_session() inputs = [c[0] for c in cman.get_training_cases()] targets = [c[1] for c in cman.get_training_cases()] print( ann.current_session.run(accuracy, feed_dict={ ann.input: inputs, ann.target: targets })) return ann
def autoex(epochs=300, nbits=4, learning_rate=0.03, showint=100, batch_size=None, vfrac=0.1, tfrac=0.1, vint=100, sm=False, bestk=None): size = 2**nbits batch_size = batch_size if batch_size else size case_generator = (lambda: TFT.gen_all_one_hot_cases(2**nbits)) case_manager = CaseManager(cfunc=case_generator, vfrac=vfrac, tfrac=tfrac) ann = Gann(dims=[size, nbits, size], case_manager=case_manager, learning_rate=learning_rate, showint=showint, batch_size=batch_size, vint=vint, softmax=sm) ann.run(epochs, bestk=bestk) ann.runmore(epochs * 2, bestk=bestk) return ann
def create_gann(self, name, networkDimsString, hiddenActivationFunc, outputActivationFunc, lossFunc, optimizer, optimizerParams, learningRate, weightInitType, weightInit, dataSource, dSourcePars, caseFrac, valFrac, testFrac, miniBatchSize): # Convert strings of numbers to list of ints if weightInitType == "uniform": weightInit = [float(i) for i in weightInit.split(" ")] else: weightInit = (0, 0) networkDims = [int(i) for i in networkDimsString.split(" ")] if dSourcePars == '': dSourcePars = [] else: dSourcePars = [int(i) for i in dSourcePars.split(" ")] if optimizerParams == '': optimizerParams = [] else: optimizerParams = [float(i) for i in optimizerParams.split(" ")] # Find the correct case-generating function case_generator = None if (dataSource == 'bitcounter'): case_generator = (lambda: TFT.gen_vector_count_cases( dSourcePars[0], dSourcePars[1])) elif (dataSource == 'autoencoder'): case_generator = ( lambda: TFT.gen_all_one_hot_cases(dSourcePars[0])) elif (dataSource == 'dense_autoencoder'): case_generator = (lambda: TFT.gen_dense_autoencoder_cases( count=dSourcePars[0], size=dSourcePars[1], dr=(dSourcePars[2], dSourcePars[3]))) elif (dataSource == 'parity'): case_generator = (lambda: TFT.gen_all_parity_cases(dSourcePars[0])) elif (dataSource == 'segment'): case_generator = (lambda: TFT.gen_segmented_vector_cases( vectorlen=dSourcePars[0], count=dSourcePars[1], minsegs=dSourcePars[2], maxsegs=dSourcePars[3])) elif (dataSource == 'MNIST'): case_generator = (lambda: TFT.gen_mnist_cases()) elif (dataSource == 'wine'): case_generator = ( lambda: TFT.gen_uc_irvine_cases('winequality_red')) elif (dataSource == 'glass'): case_generator = (lambda: TFT.gen_uc_irvine_cases('glass')) elif (dataSource == 'yeast'): case_generator = (lambda: TFT.gen_uc_irvine_cases('yeast')) elif (dataSource == 'hackers'): case_generator = ( lambda: TFT.gen_hackers_choice_cases('balance-scale')) else: raise NotImplementedError("Datasource: " + dataSource + " is not implemented") cMan = Caseman(cfunc=case_generator, cfrac=float(caseFrac), vfrac=float(valFrac), tfrac=float(testFrac)) ann = Gann(name=name, netDims=networkDims, cMan=cMan, learningRate=float(learningRate), mbs=int(miniBatchSize), hiddenActivationFunc=hiddenActivationFunc, outputActivationFunc=outputActivationFunc, lossFunc=lossFunc, optimizer=optimizer, optimizerParams=optimizerParams, weightInitType=weightInitType, weightRange=weightInit) self.ann = ann
def __init__(self, nh=3, lr=.1): self.cases = TFT.gen_all_one_hot_cases(2**nh) self.learning_rate = lr self.num_hiddens = nh self.build_neural_network(nh)
def source(self): def normalize(cases): input = [c[0] for c in cases] target = [c[1] for c in cases] input = numpy.array(input) min_arr = numpy.min(input, axis=0) max_arr = numpy.max(input, axis=0) for element in input: for i, e in enumerate(element): element[i] = (e - min_arr[i]) / (max_arr[i] - min_arr[i]) return list(zip(input, target)) def to_float(inp): # returns 0 if input is ? (questionmark) return 0 if inp == '?' else float(inp) self.source_is_called = True print("source:", self.args.source) data_set = [] if self.args.source[-4:] == ".txt": with open("data_set_files/" + self.args.source) as file: data = list( map(lambda x: re.split("[;,]", x), file.readlines())) data = list(map(lambda x: list(map(to_float, x)), data)) max_d = max(map(lambda x: int(x[-1]), data)) for element in data: input = element[:-1] target = TFT.int_to_one_hot(int(element[-1]) - 1, max_d) data_set.append([input, target]) elif self.args.source == "parity": if self.args.sourceinit is None: data_set = TFT.gen_all_parity_cases(10) else: data_set = TFT.gen_all_parity_cases(self.args.sourceinit[0]) elif self.args.source == "symmetry": if self.args.sourceinit is None: vecs = TFT.gen_symvect_dataset(101, 2000) else: vecs = TFT.gen_symvect_dataset(self.args.sourceinit[0], self.args.sourceinit[1]) inputs = list(map(lambda x: x[:-1], vecs)) targets = list(map(lambda x: TFT.int_to_one_hot(x[-1], 2), vecs)) data_set = list(zip(inputs, targets)) elif self.args.source == "auto_onehot": if self.args.sourceinit is None: data_set = TFT.gen_all_one_hot_cases(64) else: data_set = TFT.gen_all_one_hot_cases(self.args.sourceinit[0]) elif self.args.source == "auto_dense": if self.args.sourceinit is None: data_set = TFT.gen_dense_autoencoder_cases(2000, 100) else: data_set = TFT.gen_dense_autoencoder_cases( self.args.sourceinit[0], self.args.sourceinit[1]) elif self.args.source == "bitcounter": if self.args.sourceinit is None: data_set = TFT.gen_vector_count_cases(500, 15) else: data_set = TFT.gen_vector_count_cases(self.args.sourceinit[0], self.args.sourceinit[1]) elif self.args.source == "segmentcounter": if self.args.sourceinit is None: data_set = TFT.gen_segmented_vector_cases(25, 1000, 0, 8) else: data_set = TFT.gen_segmented_vector_cases(self.args.sourceinit[0], \ self.args.sourceinit[1], self.args.sourceinit[2], self.args.sourceinit[3]) elif self.args.source == "mnist": # mnist_basics.load_all_flat_cases(type='testing') cases = mnist_basics.load_all_flat_cases(type='training') input = cases[0] target = cases[1] input = list(map(lambda x: list(map(lambda e: e / 255, x)), input)) target = list(map(lambda x: TFT.int_to_one_hot(x, 10), target)) data_set = list(zip(input, target)) if data_set == []: print(self.args.source, " is illegal for argument --source") print("Legal values are: <filenme>.txt, parity, symmetry, \ auto_onehot, auto_dense, bitcounter, segmentcounter", sep="") quit() if self.args.source[-4:] == ".txt": data_set = normalize(data_set) return data_set
def data_loader(self, dataset, caseFraction, testFraction, validationFraction): if dataset == "parity": length = int(input("Length of vectors: ")) doubleFlag = input("Activate double flag y/n: ") ds = CaseManager(TFT.gen_all_parity_cases(length, doubleFlag == "y"), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) #Default values for parity self.mp.layer_dims = [10, 20, 40, 20, 1] self.mp.learning_rate = 0.001 self.mp.hidden_activation_function = "relu" self.mp.softmax = False self.mp.w_range = "scaled" self.mp.bestk = None self.mp.epochs = 400 self.mp.error_function = "mse" self.mp.optimizer = "adam" self.mp.minibatch_size = 100 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False #use this to set size of input layer print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "symmetry": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of vectors: ")) ds = CaseManager(TFT.gen_symvect_dataset(vectorLength, vectorNumber), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) #Default values for symmetry self.mp.layer_dims = [vectorLength, 40, 20, 1] self.mp.learning_rate = 0.001 self.mp.hidden_activation_function = "relu" self.mp.softmax = False self.mp.w_range = "scaled" self.mp.bestk = None self.mp.epochs = 70 self.mp.optimizer = "adam" self.mp.error_function = "mse" self.mp.minibatch_size = 8 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "autoencoder": vectorLength = int(input("Set lenght of vectors: ")) ds = CaseManager(TFT.gen_all_one_hot_cases(vectorLength), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "bitcounter": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of input vector: ")) ds = CaseManager(TFT.gen_vector_count_cases( vectorNumber, vectorLength), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) print(ds.training_cases[0]) #Default values for bitcounter self.mp.layer_dims = [15, 128, 64, 32, 16] self.mp.softmax = True self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.001 self.mp.epochs = 100 self.mp.w_range = [-0.01, 0.1] self.mp.error_function = "ce" self.mp.optimizer = "adam" self.mp.minibatch_size = 16 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False elif dataset == "segmentcounter": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of input vector: ")) minSeg = int(input("Minimum number of segments: ")) maxSeg = int(input("Maximum number of segments: ")) ds = CaseManager(TFT.gen_segmented_vector_cases( vectorLength, vectorNumber, minSeg, maxSeg), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) self.mp.layer_dims = [25, 128, 64, 32, 12, 9] self.mp.softmax = True self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.0005 self.mp.epochs = 200 self.mp.w_range = [0, 0.1] self.mp.error_function = "ce" self.mp.optimizer = "adam" self.mp.minibatch_size = 32 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False elif dataset == "mnist": cases = mnist.load_flat_text_cases( "all_flat_mnist_training_cases_text.txt") if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) numbersFordeling = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] for case in cases: num = case[1].index(1) numbersFordeling[num] += 1 print("number of cases: " + str(len(cases))) print(numbersFordeling) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for mnist self.mp.layer_dims = [784, 512, 10] self.mp.softmax = False self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.001 self.mp.w_range = [0, 0.1] self.mp.epochs = 10 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.minibatch_size = 20 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) print(ds.training_cases[0]) elif dataset == "wine": print("\n") #TODO : load wine dataset in correct format filereader = fr.FileReader() cases = filereader.readfile( "wine.txt", 9 if self.mp.custom_buckets is None else 6, [3, 4, 5, 6, 7, 8], True) print("first: " + str(len(cases))) if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) print("second: " + str(len(cases))) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for wine self.mp.layer_dims = [11, 512, 256, 128, 64, 32, 6] self.softmax = False self.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.002 self.mp.w_range = "scaled" self.mp.epochs = 20 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.custom_buckets = [1] self.mp.minibatch_size = 32 self.mp.lr_freq = 150 self.mp.bs_freq = 150 self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print((ds.training_cases[0])) print((ds.training_cases[0][0])) print((ds.training_cases[0][1])) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) i = 0 for case in ds.training_cases: try: if len(case[0]) != len(ds.training_cases[0][0]): print(len(case[0])) except Exception as e: print("HEI!! input") print(case) print("line nr " + str(i)) try: if len(case[1]) != len(ds.training_cases[0][1]): print(len(case[1])) except Exception as e: print("HEI!! target") print(case) print("line nr " + str(i)) i += 1 elif dataset == "glass": print("\n") filereader = fr.FileReader() cases = filereader.readfile( "glass.txt", 8 if self.mp.custom_buckets is None else 6, [1, 2, 3, 5, 6, 7], True) if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for glass self.mp.layer_dims = [9, 512, 256, 64, 32, 6] self.softmax = False self.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.002 self.mp.w_range = [0, 0.1] self.mp.epochs = 200 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.custom_buckets = [1] self.mp.minibatch_size = 16 self.mp.lr_freq = 100 self.mp.bs_freq = 150 self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print((ds.training_cases[0])) print((ds.training_cases[0][0])) print((ds.training_cases[0][1])) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) for case in ds.training_cases: if len(case[0]) != len(ds.training_cases[0][0]): print("HEI!! input") if len(case[1]) != len(ds.training_cases[0][1]): print("HEI!! target")
# ******* A General Artificial Neural Network ******** # This is the original GANN, which has been improved in the file gann.py #Global variable including all the functions for generating different datasets. The name of the dataset is the key in the dict. _generator = { "symmetry": lambda length, count: TFT.gen_symvect_dataset( length, count ), #Needs spesific length and number of cases: length, count "parity": lambda a: TFT.gen_all_parity_cases( a), #Need spesific length parameter, Double flag? "autoencoder": lambda a: ( TFT.gen_all_one_hot_cases(a) ), #or TFT.gen_dense_autoencoder_cases(count, size) #Needs a spesific length and size for the last a spesific density of one's "bit counter": lambda num, size: TFT.gen_vector_count_cases( num, size), #Dimensions = [same as input, hidden, input + 1] "segment counter": lambda a, b, c, d: TFT.gen_segmented_vector_cases(a, b, c, d) } #Global variable for the hidden activation functions. _hidden_activation_function = { "sigmoid": lambda a, name: tf.nn.sigmoid(a, name), "relu": lambda a, name: tf.nn.relu(a, name), "tanh": lambda a, name: tf.nn.tanh(a, name) }
'display_wgts': [], 'display_biases': [], 'dendrogram_layers': [], 'numeric': False } AUTOENCODER_CONFIG = { 'name': "autoencoder", 'steps': 30000, 'lrate': "scale", 'tint': 100, 'showint': 100, 'mbs': 100, 'wgt_range': (-.3, .3), 'hidden_layers': [4], 'hidac': (lambda x, y: tf.tanh(x, name=y)), 'outac': (lambda x, y: tf.nn.softmax(x, name=y)), 'case_generator': (lambda: TFT.gen_all_one_hot_cases(3)), 'stdeviation': False, 'vfrac': 0.0, 'tfrac': 0.0, 'cfunc': 'rmse', 'mapbs': 3, 'cfrac': 1.0, 'map_layers': [0, 1, 2], 'display_wgts': [1, 2], 'display_biases': [1, 2], 'dendrogram_layers': [1], 'numeric': False }