def segments(epochs=3001, length=25, ncases=1000, min_seg=0, max_seg=8, one_hot=True, learning_rate=0.0001, showint=1000, batch_size=32, vfrac=0.1, tfrac=0.1, vint=200, sm=True, bestk=1): case_generator = (lambda: TFT.gen_segmented_vector_cases( length, ncases, min_seg, max_seg, one_hot)) case_manager = CaseManager(cfunc=case_generator, vfrac=vfrac, tfrac=tfrac) ann = Gann(dims=[length, 500, max_seg - min_seg + 1], case_manager=case_manager, learning_rate=learning_rate, showint=showint, batch_size=batch_size, vint=vint, softmax=sm) ann.run(epochs, bestk=bestk) return ann
def manage_data_loaders(function_name, params, loss_function): one_hot = False if (function_name == "load_mnist"): cases = mnist(params, loss_function) elif (function_name == "dataset_loader"): cases = dataset_loader(params[0], loss_function) elif (function_name == "gen_all_parity_cases"): if (len(params) == 1): cases = TFT.gen_all_parity_cases(int(params[0])) else: cases = TFT.gen_all_parity_cases(int(params[0]), bool(params[1])) elif (function_name == "gen_all_one_hot_cases"): one_hot = True cases = TFT.gen_all_one_hot_cases(2**int(params[0])) elif (function_name == "gen_vector_count_cases"): one_hot = True nof_cases = int(params[0]) length = int(params[1]) cases = TFT.gen_vector_count_cases(nof_cases, length) elif (function_name == "gen_segmented_vector_cases"): one_hot = True # ASSUMING all these parameters are given: try: nbits = int(params[0]) nof_cases = int(params[1]) min_seg = int(params[2]) max_seg = int(params[3]) except: print( "Not enough arguments given! [nbits, nof_cases, min_seg, max_seg]" ) return cases = TFT.gen_segmented_vector_cases(nbits, nof_cases, min_seg, max_seg) # Sparse need integers as labels, not vectors: if (loss_function == "sparse_softmax_cross_entropy"): bit_cases = [] for case in cases: if (function_name == "gen_all_parity_cases"): if (len(case[1]) == 1): bit_cases.append([case[0], case[1]]) else: bit_cases.append([case[0], case[1][1]]) else: for i, b in enumerate(case[1]): if (b): bit_cases.append([case[0], i]) break
def segmentcounter(self): size = input("Enter the size. 25 is default: ") size = size if size else 25 count = input("Enter the number of cases. 1000 default: ") count = count if count else 1000 minsegs = input("Enter the minimum number of segments in a vector. Default 0: ") minsegs = minsegs if minsegs else 0 maxsegs = input("Enter the maximum number of segments in a vector. Default 8: ") maxsegs = maxsegs if maxsegs else 8 print(size, count, minsegs, maxsegs) case_generator = (lambda: TFT.gen_segmented_vector_cases(size, count, minsegs, maxsegs)) case_man = Caseman(cfunc=case_generator, vfrac=self.params.vfrac, tfrac=self.params.tfrac) self.ann.set_cman(case_man) self.params.dims[0] = len(case_man.training_cases[0][0]) self.params.dims[-1] = len(case_man.training_cases[0][1]) print("\nNumber of bits taken from input layer: ", self.params.dims[0], "and output set to target vector length at: ", self.params.dims[-1]) model = self.build_ann() self.ann.set_model(model) model.run(steps=self.params.steps, bestk=self.params.bestk) self.check_mapping_and_dendro()
def create_gann(self, name, networkDimsString, hiddenActivationFunc, outputActivationFunc, lossFunc, optimizer, optimizerParams, learningRate, weightInitType, weightInit, dataSource, dSourcePars, caseFrac, valFrac, testFrac, miniBatchSize): # Convert strings of numbers to list of ints if weightInitType == "uniform": weightInit = [float(i) for i in weightInit.split(" ")] else: weightInit = (0, 0) networkDims = [int(i) for i in networkDimsString.split(" ")] if dSourcePars == '': dSourcePars = [] else: dSourcePars = [int(i) for i in dSourcePars.split(" ")] if optimizerParams == '': optimizerParams = [] else: optimizerParams = [float(i) for i in optimizerParams.split(" ")] # Find the correct case-generating function case_generator = None if (dataSource == 'bitcounter'): case_generator = (lambda: TFT.gen_vector_count_cases( dSourcePars[0], dSourcePars[1])) elif (dataSource == 'autoencoder'): case_generator = ( lambda: TFT.gen_all_one_hot_cases(dSourcePars[0])) elif (dataSource == 'dense_autoencoder'): case_generator = (lambda: TFT.gen_dense_autoencoder_cases( count=dSourcePars[0], size=dSourcePars[1], dr=(dSourcePars[2], dSourcePars[3]))) elif (dataSource == 'parity'): case_generator = (lambda: TFT.gen_all_parity_cases(dSourcePars[0])) elif (dataSource == 'segment'): case_generator = (lambda: TFT.gen_segmented_vector_cases( vectorlen=dSourcePars[0], count=dSourcePars[1], minsegs=dSourcePars[2], maxsegs=dSourcePars[3])) elif (dataSource == 'MNIST'): case_generator = (lambda: TFT.gen_mnist_cases()) elif (dataSource == 'wine'): case_generator = ( lambda: TFT.gen_uc_irvine_cases('winequality_red')) elif (dataSource == 'glass'): case_generator = (lambda: TFT.gen_uc_irvine_cases('glass')) elif (dataSource == 'yeast'): case_generator = (lambda: TFT.gen_uc_irvine_cases('yeast')) elif (dataSource == 'hackers'): case_generator = ( lambda: TFT.gen_hackers_choice_cases('balance-scale')) else: raise NotImplementedError("Datasource: " + dataSource + " is not implemented") cMan = Caseman(cfunc=case_generator, cfrac=float(caseFrac), vfrac=float(valFrac), tfrac=float(testFrac)) ann = Gann(name=name, netDims=networkDims, cMan=cMan, learningRate=float(learningRate), mbs=int(miniBatchSize), hiddenActivationFunc=hiddenActivationFunc, outputActivationFunc=outputActivationFunc, lossFunc=lossFunc, optimizer=optimizer, optimizerParams=optimizerParams, weightInitType=weightInitType, weightRange=weightInit) self.ann = ann
def source(self): def normalize(cases): input = [c[0] for c in cases] target = [c[1] for c in cases] input = numpy.array(input) min_arr = numpy.min(input, axis=0) max_arr = numpy.max(input, axis=0) for element in input: for i, e in enumerate(element): element[i] = (e - min_arr[i]) / (max_arr[i] - min_arr[i]) return list(zip(input, target)) def to_float(inp): # returns 0 if input is ? (questionmark) return 0 if inp == '?' else float(inp) self.source_is_called = True print("source:", self.args.source) data_set = [] if self.args.source[-4:] == ".txt": with open("data_set_files/" + self.args.source) as file: data = list( map(lambda x: re.split("[;,]", x), file.readlines())) data = list(map(lambda x: list(map(to_float, x)), data)) max_d = max(map(lambda x: int(x[-1]), data)) for element in data: input = element[:-1] target = TFT.int_to_one_hot(int(element[-1]) - 1, max_d) data_set.append([input, target]) elif self.args.source == "parity": if self.args.sourceinit is None: data_set = TFT.gen_all_parity_cases(10) else: data_set = TFT.gen_all_parity_cases(self.args.sourceinit[0]) elif self.args.source == "symmetry": if self.args.sourceinit is None: vecs = TFT.gen_symvect_dataset(101, 2000) else: vecs = TFT.gen_symvect_dataset(self.args.sourceinit[0], self.args.sourceinit[1]) inputs = list(map(lambda x: x[:-1], vecs)) targets = list(map(lambda x: TFT.int_to_one_hot(x[-1], 2), vecs)) data_set = list(zip(inputs, targets)) elif self.args.source == "auto_onehot": if self.args.sourceinit is None: data_set = TFT.gen_all_one_hot_cases(64) else: data_set = TFT.gen_all_one_hot_cases(self.args.sourceinit[0]) elif self.args.source == "auto_dense": if self.args.sourceinit is None: data_set = TFT.gen_dense_autoencoder_cases(2000, 100) else: data_set = TFT.gen_dense_autoencoder_cases( self.args.sourceinit[0], self.args.sourceinit[1]) elif self.args.source == "bitcounter": if self.args.sourceinit is None: data_set = TFT.gen_vector_count_cases(500, 15) else: data_set = TFT.gen_vector_count_cases(self.args.sourceinit[0], self.args.sourceinit[1]) elif self.args.source == "segmentcounter": if self.args.sourceinit is None: data_set = TFT.gen_segmented_vector_cases(25, 1000, 0, 8) else: data_set = TFT.gen_segmented_vector_cases(self.args.sourceinit[0], \ self.args.sourceinit[1], self.args.sourceinit[2], self.args.sourceinit[3]) elif self.args.source == "mnist": # mnist_basics.load_all_flat_cases(type='testing') cases = mnist_basics.load_all_flat_cases(type='training') input = cases[0] target = cases[1] input = list(map(lambda x: list(map(lambda e: e / 255, x)), input)) target = list(map(lambda x: TFT.int_to_one_hot(x, 10), target)) data_set = list(zip(input, target)) if data_set == []: print(self.args.source, " is illegal for argument --source") print("Legal values are: <filenme>.txt, parity, symmetry, \ auto_onehot, auto_dense, bitcounter, segmentcounter", sep="") quit() if self.args.source[-4:] == ".txt": data_set = normalize(data_set) return data_set
def cfunc_seg_vectors(): return tft.gen_segmented_vector_cases(data_src_params[0], data_src_params[1], data_src_params[2], data_src_params[3])
def data_loader(self, dataset, caseFraction, testFraction, validationFraction): if dataset == "parity": length = int(input("Length of vectors: ")) doubleFlag = input("Activate double flag y/n: ") ds = CaseManager(TFT.gen_all_parity_cases(length, doubleFlag == "y"), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) #Default values for parity self.mp.layer_dims = [10, 20, 40, 20, 1] self.mp.learning_rate = 0.001 self.mp.hidden_activation_function = "relu" self.mp.softmax = False self.mp.w_range = "scaled" self.mp.bestk = None self.mp.epochs = 400 self.mp.error_function = "mse" self.mp.optimizer = "adam" self.mp.minibatch_size = 100 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False #use this to set size of input layer print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "symmetry": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of vectors: ")) ds = CaseManager(TFT.gen_symvect_dataset(vectorLength, vectorNumber), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) #Default values for symmetry self.mp.layer_dims = [vectorLength, 40, 20, 1] self.mp.learning_rate = 0.001 self.mp.hidden_activation_function = "relu" self.mp.softmax = False self.mp.w_range = "scaled" self.mp.bestk = None self.mp.epochs = 70 self.mp.optimizer = "adam" self.mp.error_function = "mse" self.mp.minibatch_size = 8 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "autoencoder": vectorLength = int(input("Set lenght of vectors: ")) ds = CaseManager(TFT.gen_all_one_hot_cases(vectorLength), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) elif dataset == "bitcounter": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of input vector: ")) ds = CaseManager(TFT.gen_vector_count_cases( vectorNumber, vectorLength), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) print(ds.training_cases[0]) #Default values for bitcounter self.mp.layer_dims = [15, 128, 64, 32, 16] self.mp.softmax = True self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.001 self.mp.epochs = 100 self.mp.w_range = [-0.01, 0.1] self.mp.error_function = "ce" self.mp.optimizer = "adam" self.mp.minibatch_size = 16 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False elif dataset == "segmentcounter": vectorNumber = int(input("Number of cases: ")) vectorLength = int(input("Length of input vector: ")) minSeg = int(input("Minimum number of segments: ")) maxSeg = int(input("Maximum number of segments: ")) ds = CaseManager(TFT.gen_segmented_vector_cases( vectorLength, vectorNumber, minSeg, maxSeg), validation_fraction=validationFraction, test_fraction=testFraction) self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) self.mp.layer_dims = [25, 128, 64, 32, 12, 9] self.mp.softmax = True self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.0005 self.mp.epochs = 200 self.mp.w_range = [0, 0.1] self.mp.error_function = "ce" self.mp.optimizer = "adam" self.mp.minibatch_size = 32 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False elif dataset == "mnist": cases = mnist.load_flat_text_cases( "all_flat_mnist_training_cases_text.txt") if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) numbersFordeling = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] for case in cases: num = case[1].index(1) numbersFordeling[num] += 1 print("number of cases: " + str(len(cases))) print(numbersFordeling) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for mnist self.mp.layer_dims = [784, 512, 10] self.mp.softmax = False self.mp.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.001 self.mp.w_range = [0, 0.1] self.mp.epochs = 10 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.minibatch_size = 20 self.mp.lr_freq = None self.mp.bs_freq = None self.custom_buckets = None self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) print(ds.training_cases[0]) elif dataset == "wine": print("\n") #TODO : load wine dataset in correct format filereader = fr.FileReader() cases = filereader.readfile( "wine.txt", 9 if self.mp.custom_buckets is None else 6, [3, 4, 5, 6, 7, 8], True) print("first: " + str(len(cases))) if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) print("second: " + str(len(cases))) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for wine self.mp.layer_dims = [11, 512, 256, 128, 64, 32, 6] self.softmax = False self.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.002 self.mp.w_range = "scaled" self.mp.epochs = 20 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.custom_buckets = [1] self.mp.minibatch_size = 32 self.mp.lr_freq = 150 self.mp.bs_freq = 150 self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print((ds.training_cases[0])) print((ds.training_cases[0][0])) print((ds.training_cases[0][1])) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) i = 0 for case in ds.training_cases: try: if len(case[0]) != len(ds.training_cases[0][0]): print(len(case[0])) except Exception as e: print("HEI!! input") print(case) print("line nr " + str(i)) try: if len(case[1]) != len(ds.training_cases[0][1]): print(len(case[1])) except Exception as e: print("HEI!! target") print(case) print("line nr " + str(i)) i += 1 elif dataset == "glass": print("\n") filereader = fr.FileReader() cases = filereader.readfile( "glass.txt", 8 if self.mp.custom_buckets is None else 6, [1, 2, 3, 5, 6, 7], True) if caseFraction != 1: cases = TFT.get_fraction_of_cases(cases, caseFraction) ds = CaseManager(cases, validation_fraction=validationFraction, test_fraction=testFraction) #Default values for glass self.mp.layer_dims = [9, 512, 256, 64, 32, 6] self.softmax = False self.hidden_activation_function = "relu" self.mp.bestk = 1 self.mp.learning_rate = 0.002 self.mp.w_range = [0, 0.1] self.mp.epochs = 200 self.mp.error_function = "sce" self.mp.optimizer = "adam" self.mp.custom_buckets = [1] self.mp.minibatch_size = 16 self.mp.lr_freq = 100 self.mp.bs_freq = 150 self.target_accuracy = None self.early_stopping = False self.openAA.set_case_manager(ds) print((ds.training_cases[0])) print((ds.training_cases[0][0])) print((ds.training_cases[0][1])) print("Input size: " + str(len(ds.training_cases[0][0])) + ", Output size: " + str(len(ds.training_cases[0][1]))) for case in ds.training_cases: if len(case[0]) != len(ds.training_cases[0][0]): print("HEI!! input") if len(case[1]) != len(ds.training_cases[0][1]): print("HEI!! target")
"symmetry": lambda length, count: TFT.gen_symvect_dataset( length, count ), #Needs spesific length and number of cases: length, count "parity": lambda a: TFT.gen_all_parity_cases( a), #Need spesific length parameter, Double flag? "autoencoder": lambda a: ( TFT.gen_all_one_hot_cases(a) ), #or TFT.gen_dense_autoencoder_cases(count, size) #Needs a spesific length and size for the last a spesific density of one's "bit counter": lambda num, size: TFT.gen_vector_count_cases( num, size), #Dimensions = [same as input, hidden, input + 1] "segment counter": lambda a, b, c, d: TFT.gen_segmented_vector_cases(a, b, c, d) } #Global variable for the hidden activation functions. _hidden_activation_function = { "sigmoid": lambda a, name: tf.nn.sigmoid(a, name), "relu": lambda a, name: tf.nn.relu(a, name), "tanh": lambda a, name: tf.nn.tanh(a, name) } #Global variable for the optimizer functions. _optimizer = { "gradientDescent": lambda lrate: tf.train.GradientDescentOptimizer(lrate), "adagradOptimizer": lambda a: tf.train.AdagradOptimizer(a), "adamOptimizer": lambda a: tf.train.AdamOptimizer(a), "RMSPropOptimizer": lambda a: tf.train.RMSPropOptimizer(a)
'dendrogram_layers': [], 'numeric': False } SEGCOUNTER_CONFIG = { 'name': "segcounter", 'steps': 100000, 'lrate': "scale", 'tint': 100, 'showint': 100, 'mbs': 100, 'wgt_range': (-.3, .3), 'hidden_layers': [50, 50], 'hidac': (lambda x, y: tf.tanh(x, name=y)), 'outac': (lambda x, y: tf.nn.softmax(x, name=y)), 'case_generator': (lambda: TFT.gen_segmented_vector_cases(25, 1000, 0, 8)), 'stdeviation': False, 'vfrac': 0.1, 'tfrac': 0.1, 'cfunc': 'rmse', 'mapbs': 0, 'cfrac': 1.0, 'map_layers': [], 'display_wgts': [], 'display_biases': [], 'dendrogram_layers': [], 'numeric': False } BITCOUNTER_CONFIG = { 'name': "bitcounter",