def read_data(self, noOfImages): images = [] a,b = mb.load_all_flat_cases() for i in range(noOfImages): images.append([a[i],b[i]]) return noOfImages, images
def main(sizes, training_iteration, act, test=False): features, labels = db.load_all_flat_cases() features = np.asarray(features) features = features / 255 labels = format_labels(labels, 10) ann = Ann(sizes, features, labels, act) # x = list(range(20)) # y = [] """Training the network""" for i in range(training_iteration): for start, end in zip(range(0, len(features), 128), range(128, len(features), 128)): ann.cost = ann.train(features[start:end], labels[start:end]) print("Training Step: ", i + 1, "/", training_iteration) """Where the network's accuracy is tested""" if test: features_test, labels_test = db.load_all_flat_cases('testing') features_test = np.asarray(features_test) features_test = features_test / 255. labels_test = format_labels(labels_test, 10) test_case = features_test answer = labels_test num = 0 predictions = ann.predict(test_case) for p in range(len(predictions)): if predictions[p] == np.argmax(answer[p]): num += 1 print(str(num) + " / 10000") #y.append(num) # canvas = plt.figure() # rect = canvas.patch # rect.set_facecolor("white") # sp1 = canvas.add_subplot(1,1,1, axisbg="w") # sp1.plot(x, y, "red", linewidth=2) # canvas.suptitle("Accuracy") # plt.xlabel("Run nr") # plt.ylabel("Correct classifications") # plt.ylim([2000,10000]) # plt.show() if not test: temp = input("WAIT....") major_demo(ann, 10, "basics/")
def converteFlatMnistTo2D(self): cases2D = [] cases = MNIST.load_all_flat_cases( ) # returns [ [cases] , [targets] ] --> cases = [ [features...] ] for f, t in zip(cases[0], cases[1]): f = [feature / 255 for feature in f] case = [f, t] cases2D.append(case) return cases2D
def get_mnist_cases(cfrac): inputs, targets_vector = load_all_flat_cases('training', '') target_size = max(targets_vector) + 1 targets = [] for target_value in targets_vector: target = [0] * target_size target[target_value - 1] = 1 targets.append(target) cases = [[inputs[i], targets[i]] for i in range(len(inputs))] if cfrac: cases = sample(cases, math.ceil(len(cases) * cfrac)) return cases
def getData(ds, caseFraction, noClasses): dataStructured = [] if ds == "mnist": features, labels = mb.load_all_flat_cases() scale_MNIST(features) ohl = make_one_hot(labels, noClasses) for i in range(len(features)): dataStructured.append([features[i], ohl[i]]) elif len(ds) > 4 and ds[-4] == '.': dataStructured = getTextFileData(ds, caseFraction, noClasses) else: t = ds.split(';') temp = t[1].split(',') par = list(map(int, temp)) dataStructured = getattr(TFT, t[0])(*par) if caseFraction != 1: random.shuffle(dataStructured) dataStructured = dataStructured[:int(caseFraction * len(dataStructured))] return dataStructured
def source(self): def normalize(cases): input = [c[0] for c in cases] target = [c[1] for c in cases] input = numpy.array(input) min_arr = numpy.min(input, axis=0) max_arr = numpy.max(input, axis=0) for element in input: for i, e in enumerate(element): element[i] = (e - min_arr[i]) / (max_arr[i] - min_arr[i]) return list(zip(input, target)) def to_float(inp): # returns 0 if input is ? (questionmark) return 0 if inp == '?' else float(inp) self.source_is_called = True print("source:", self.args.source) data_set = [] if self.args.source[-4:] == ".txt": with open("data_set_files/" + self.args.source) as file: data = list( map(lambda x: re.split("[;,]", x), file.readlines())) data = list(map(lambda x: list(map(to_float, x)), data)) max_d = max(map(lambda x: int(x[-1]), data)) for element in data: input = element[:-1] target = TFT.int_to_one_hot(int(element[-1]) - 1, max_d) data_set.append([input, target]) elif self.args.source == "parity": if self.args.sourceinit is None: data_set = TFT.gen_all_parity_cases(10) else: data_set = TFT.gen_all_parity_cases(self.args.sourceinit[0]) elif self.args.source == "symmetry": if self.args.sourceinit is None: vecs = TFT.gen_symvect_dataset(101, 2000) else: vecs = TFT.gen_symvect_dataset(self.args.sourceinit[0], self.args.sourceinit[1]) inputs = list(map(lambda x: x[:-1], vecs)) targets = list(map(lambda x: TFT.int_to_one_hot(x[-1], 2), vecs)) data_set = list(zip(inputs, targets)) elif self.args.source == "auto_onehot": if self.args.sourceinit is None: data_set = TFT.gen_all_one_hot_cases(64) else: data_set = TFT.gen_all_one_hot_cases(self.args.sourceinit[0]) elif self.args.source == "auto_dense": if self.args.sourceinit is None: data_set = TFT.gen_dense_autoencoder_cases(2000, 100) else: data_set = TFT.gen_dense_autoencoder_cases( self.args.sourceinit[0], self.args.sourceinit[1]) elif self.args.source == "bitcounter": if self.args.sourceinit is None: data_set = TFT.gen_vector_count_cases(500, 15) else: data_set = TFT.gen_vector_count_cases(self.args.sourceinit[0], self.args.sourceinit[1]) elif self.args.source == "segmentcounter": if self.args.sourceinit is None: data_set = TFT.gen_segmented_vector_cases(25, 1000, 0, 8) else: data_set = TFT.gen_segmented_vector_cases(self.args.sourceinit[0], \ self.args.sourceinit[1], self.args.sourceinit[2], self.args.sourceinit[3]) elif self.args.source == "mnist": # mnist_basics.load_all_flat_cases(type='testing') cases = mnist_basics.load_all_flat_cases(type='training') input = cases[0] target = cases[1] input = list(map(lambda x: list(map(lambda e: e / 255, x)), input)) target = list(map(lambda x: TFT.int_to_one_hot(x, 10), target)) data_set = list(zip(input, target)) if data_set == []: print(self.args.source, " is illegal for argument --source") print("Legal values are: <filenme>.txt, parity, symmetry, \ auto_onehot, auto_dense, bitcounter, segmentcounter", sep="") quit() if self.args.source[-4:] == ".txt": data_set = normalize(data_set) return data_set
def main(): def epoch_status_function(epoch_time, epoch, average_loss, testing_error, is_best): if is_best: with open(os.path.join(base_path, 'model.pkl'), 'wb') as model_file: pickle.dump(network, model_file) with open(os.path.join(base_path, 'loss.txt'), 'at') as error_file: print('{} {:.4f} {:.10f}'.format(epoch, epoch_time, average_loss), file=error_file) with open(os.path.join(base_path, 'error.txt'), 'at') as error_file: print('{} {:.4f} {:.10f}'.format(epoch, epoch_time, testing_error * 100.0), file=error_file) print("Time: {:7.2f} sec, Epoch: {:4d}, Testing error: {:.5f}%".format( epoch_time, epoch, testing_error * 100.0)) def setup_base_path(runs): def format_float(f): return '{:f}'.format(f).rstrip('0').rstrip('.') base_path = os.path.join(args.output_directory, 'mnist_network_{}_layers_{}_activation_{}_learning_{}_minibatches{}{}{}'.format( '-'.join(args.hidden_layers), args.hidden_function, format_float(args.learning_rate), args.minibatch_size, ('_' + format_float(args.L1) + '_L1') if args.L1 else '', ('_' + format_float(args.L2) + '_L2') if args.L2 else '', ('_' + format_float(args.dropout) + '_dropout') if args.dropout else '')) if runs: run_index = 1 while True: base_path_run = os.path.join(base_path, str(run_index)) if os.path.isdir(base_path_run): run_index += 1 else: base_path = base_path_run break if not os.path.isdir(base_path): os.makedirs(base_path) existing_files = [ f for f in os.listdir(base_path) ] for existing_file in existing_files: try: os.remove(existing_file) except: pass return base_path parser = argparse.ArgumentParser() parser.add_argument('--L1', type=float) parser.add_argument('--L2', type=float) parser.add_argument('--R', type=int) parser.add_argument('--data') parser.add_argument('--dropout', type=float) parser.add_argument('--epochs', type=int, default=100) parser.add_argument('--hidden_function', default='relu') parser.add_argument('--hidden_layers', nargs='*') parser.add_argument('--learning_rate', type=float, default=0.005) parser.add_argument('--major_demo', action='store_true') parser.add_argument('--max_time', type=int) parser.add_argument('--minibatch_size', type=int, default=40) parser.add_argument('--minor_demo', action='store_true') parser.add_argument('--model') parser.add_argument('--output_directory', default='../data') parser.add_argument('--runs', action='store_true') parser.add_argument('--seed', type=int) parser.add_argument('--training_ratio', type=float) args = parser.parse_args() print(args) if args.seed: random.seed(args.seed) numpy.random.seed(random.randint(0, 2 ** 30)) if (args.major_demo or args.minor_demo) and not args.model: print("No model file provided.", file=sys.stderr) sys.exit(-1) if args.model: network = pickle.load(open(args.model, 'rb')) predict_function = theano.function( inputs=[network.inputs], outputs=network.classify()) if args.major_demo: if not args.R: print("No R parameter provided.", file=sys.stderr) sys.exit(-1) ann = Ann(predict_function) d = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../mnist/basics/') print('r = {} d = {}'.format(args.R, d)) mnistdemo.major_demo(ann, args.R, d) elif args.minor_demo: mnist_basics.minor_demo(Ann(predict_function, args.minibatch_size)) elif args.data: dataset, labelset = pickle.load(open(args.data, 'rb')) total = len(dataset) correct = sum(predict_function(dataset[i:i+1])[0] == labelset[i:i+1][0] for i in range(len(dataset))) print("{}/{} ({:.2f}%)".format(correct, total, 100.0 * correct / total)) else: print("No data file provided.", file=sys.stderr) sys.exit(-1) else: base_path = setup_base_path(args.runs) with open(os.path.join(base_path, 'log.txt'), 'at') as log_file: temp_stdout = sys.stdout sys.stdout = Tee(sys.stdout, log_file) layer_sizes = [28 * 28] + list(map(int, args.hidden_layers or [])) + [10] print("Loading dataset 'training' from file...") flat_training_data, flat_training_labels = mnist_basics.load_all_flat_cases('training', dir='../mnist/basics/') print("Loading dataset 'testing' from file...") flat_testing_data, flat_testing_labels = mnist_basics.load_all_flat_cases('testing', dir='../mnist/basics/') print("Creating shared Theano dataset variables...") num_training_examples = int(math.ceil(args.training_ratio * len(flat_training_data))) if args.training_ratio else len(flat_training_data) training_dataset = build_theano_dataset(flat_training_data[:num_training_examples], flat_training_labels[:num_training_examples]) testing_dataset = build_theano_dataset(flat_testing_data, flat_testing_labels) minibatch_index = T.lscalar() x = T.matrix('x') y = T.ivector('y') activation_functions = { "relu": theano.tensor.nnet.relu, "sigmoid": theano.tensor.nnet.sigmoid, "tanh": theano.tensor.tanh } network = vi.theano.Network( x, layer_sizes, activation_functions[args.hidden_function], args.dropout, args.L1, args.L2) training_minibatch_count = int(math.ceil(training_dataset.size / args.minibatch_size)) testing_minibatch_count = int(math.ceil(testing_dataset.size / args.minibatch_size)) training_function, testing_function = build_theano_functions( network, x, y, minibatch_index, args.minibatch_size, args.learning_rate, training_dataset, testing_dataset) print("Starting stochastic gradient descent. num_training_examples={}".format( num_training_examples, args.learning_rate, args.epochs, args.max_time)) training_time, training_epochs, testing_error = \ vi.theano.stochastic_gradient_descent( training_function, training_minibatch_count, testing_function, testing_minibatch_count, learning_rate=args.learning_rate, epochs=args.epochs, max_time=args.max_time, epoch_status_function=epoch_status_function) print(("Training completed after {:.2f} seconds. {} epochs at {:.2f} epochs / second. " + "Testing error: {:.5f}%").format( training_time, training_epochs, training_epochs / training_time, testing_error * 100.0)) time.sleep(1) sys.stdout.flush() sys.stdout = temp_stdout
def read_mnist(self): cases, label = mnists.load_all_flat_cases() cases = np.array(cases) / 255 for i in range(int(len(cases))): self.cases.append( [cases[i], TFT.int_to_one_hot(int(label[i]), 10)])
def generate_cases(self): # Run the case generator. Case = [input-vector, target-vector] if (self.casenr == 0): # Parity # params: num_bits, double=True try: num_bits = int(self.params[0]) if (len(self.params) > 1): double = self.params[1] else: double = True self.cases = TFT.gen_all_parity_cases(num_bits, double) except ValueError: print("Case Parameters not valid") elif (self.casenr == 1): # Autoencoder - NO performance testing # TODO: Add if time; NOTE THIS IS THE CORRECT GENERATION # Can choose between this: try: nbits = int(self.params[0]) for i in range(0, 100): cases = TFT.gen_all_one_hot_cases(2**nbits) for j in range(0, len(cases)): print(cases[j]) self.cases.append(cases[j]) except ValueError: print("Case Parameters not valid") # OR this: # self.cases = TFT.gen_dense_autoencoder_cases(count,size,dr=(0,1) elif (self.casenr == 2): # Bit counter try: ncases = int(self.params[0]) nbits = int(self.params[1]) self.cases = TFT.gen_vector_count_cases(ncases, nbits) except ValueError: print("Case Parameters not valid") elif (self.casenr == 3): # Segment counter try: size = int(self.params[0]) count = int(self.params[1]) minsegs = int(self.params[2]) maxsegs = int(self.params[3]) if (len(self.params) > 4): poptargs = False else: poptargs = True self.cases = TFT.gen_segmented_vector_cases( size, count, minsegs, maxsegs, poptargs) except ValueError: print("Case Parameters not valid") elif (self.casenr == 4): # MNIST cases = mnist.load_all_flat_cases() for case in cases: inp = case[:-1] for j, num in enumerate(inp): inp[j] = num / 255 label = TFT.int_to_one_hot( case[-1], 10, ) self.cases.append([inp, label]) elif (self.casenr == 5): # Wine quality f = open("./datasets/winequality_red.txt", "r") for line in f: arr = [float(i) for i in line.split(';')[:-1]] label = TFT.int_to_one_hot(int(line.split(';')[-1][0]) - 3, 6) self.cases.append([arr, label]) elif (self.casenr == 6): # Glass f = open("./datasets/glass.txt", "r") for line in f: arr = [float(i) for i in line.split(',')[:-1]] arr[1] = arr[1] / 13 arr[4] = arr[4] / 74 arr[6] = arr[6] / 8 label = TFT.int_to_one_hot(int(line.split(',')[-1][0]) - 1, 7) self.cases.append([arr, label]) elif (self.casenr == 7): # Yeast f = open("./datasets/yeast.txt", "r") for line in f: arr = [float(i) for i in line.split(',')[:-1]] label = TFT.int_to_one_hot(int(line.split(',')[-1][0]) - 1, 10) self.cases.append([arr, label]) elif (self.casenr == 8): # Hackers choice # TODO self.cases = None else: print("not a valid case") print(len(self.cases), "cases generated")
def main(): def epoch_status_function(epoch_time, epoch, average_loss, testing_error, is_best): if is_best: with open(os.path.join(base_path, 'model.pkl'), 'wb') as model_file: pickle.dump(network, model_file) with open(os.path.join(base_path, 'loss.txt'), 'at') as error_file: print('{} {:.4f} {:.10f}'.format(epoch, epoch_time, average_loss), file=error_file) with open(os.path.join(base_path, 'error.txt'), 'at') as error_file: print('{} {:.4f} {:.10f}'.format(epoch, epoch_time, testing_error * 100.0), file=error_file) print("Time: {:7.2f} sec, Epoch: {:4d}, Testing error: {:.5f}%".format( epoch_time, epoch, testing_error * 100.0)) def setup_base_path(runs): def format_float(f): return '{:f}'.format(f).rstrip('0').rstrip('.') base_path = os.path.join( args.output_directory, 'mnist_network_{}_layers_{}_activation_{}_learning_{}_minibatches{}{}{}' .format('-'.join(args.hidden_layers), args.hidden_function, format_float(args.learning_rate), args.minibatch_size, ('_' + format_float(args.L1) + '_L1') if args.L1 else '', ('_' + format_float(args.L2) + '_L2') if args.L2 else '', ('_' + format_float(args.dropout) + '_dropout') if args.dropout else '')) if runs: run_index = 1 while True: base_path_run = os.path.join(base_path, str(run_index)) if os.path.isdir(base_path_run): run_index += 1 else: base_path = base_path_run break if not os.path.isdir(base_path): os.makedirs(base_path) existing_files = [f for f in os.listdir(base_path)] for existing_file in existing_files: try: os.remove(existing_file) except: pass return base_path parser = argparse.ArgumentParser() parser.add_argument('--L1', type=float) parser.add_argument('--L2', type=float) parser.add_argument('--R', type=int) parser.add_argument('--data') parser.add_argument('--dropout', type=float) parser.add_argument('--epochs', type=int, default=100) parser.add_argument('--hidden_function', default='relu') parser.add_argument('--hidden_layers', nargs='*') parser.add_argument('--learning_rate', type=float, default=0.005) parser.add_argument('--major_demo', action='store_true') parser.add_argument('--max_time', type=int) parser.add_argument('--minibatch_size', type=int, default=40) parser.add_argument('--minor_demo', action='store_true') parser.add_argument('--model') parser.add_argument('--output_directory', default='../data') parser.add_argument('--runs', action='store_true') parser.add_argument('--seed', type=int) parser.add_argument('--training_ratio', type=float) args = parser.parse_args() print(args) if args.seed: random.seed(args.seed) numpy.random.seed(random.randint(0, 2**30)) if (args.major_demo or args.minor_demo) and not args.model: print("No model file provided.", file=sys.stderr) sys.exit(-1) if args.model: network = pickle.load(open(args.model, 'rb')) predict_function = theano.function(inputs=[network.inputs], outputs=network.classify()) if args.major_demo: if not args.R: print("No R parameter provided.", file=sys.stderr) sys.exit(-1) ann = Ann(predict_function) d = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../mnist/basics/') print('r = {} d = {}'.format(args.R, d)) mnistdemo.major_demo(ann, args.R, d) elif args.minor_demo: mnist_basics.minor_demo(Ann(predict_function, args.minibatch_size)) elif args.data: dataset, labelset = pickle.load(open(args.data, 'rb')) total = len(dataset) correct = sum( predict_function(dataset[i:i + 1])[0] == labelset[i:i + 1][0] for i in range(len(dataset))) print("{}/{} ({:.2f}%)".format(correct, total, 100.0 * correct / total)) else: print("No data file provided.", file=sys.stderr) sys.exit(-1) else: base_path = setup_base_path(args.runs) with open(os.path.join(base_path, 'log.txt'), 'at') as log_file: temp_stdout = sys.stdout sys.stdout = Tee(sys.stdout, log_file) layer_sizes = [28 * 28] + list(map(int, args.hidden_layers or [])) + [10] print("Loading dataset 'training' from file...") flat_training_data, flat_training_labels = mnist_basics.load_all_flat_cases( 'training', dir='../mnist/basics/') print("Loading dataset 'testing' from file...") flat_testing_data, flat_testing_labels = mnist_basics.load_all_flat_cases( 'testing', dir='../mnist/basics/') print("Creating shared Theano dataset variables...") num_training_examples = int( math.ceil(args.training_ratio * len(flat_training_data)) ) if args.training_ratio else len(flat_training_data) training_dataset = build_theano_dataset( flat_training_data[:num_training_examples], flat_training_labels[:num_training_examples]) testing_dataset = build_theano_dataset(flat_testing_data, flat_testing_labels) minibatch_index = T.lscalar() x = T.matrix('x') y = T.ivector('y') activation_functions = { "relu": theano.tensor.nnet.relu, "sigmoid": theano.tensor.nnet.sigmoid, "tanh": theano.tensor.tanh } network = vi.theano.Network( x, layer_sizes, activation_functions[args.hidden_function], args.dropout, args.L1, args.L2) training_minibatch_count = int( math.ceil(training_dataset.size / args.minibatch_size)) testing_minibatch_count = int( math.ceil(testing_dataset.size / args.minibatch_size)) training_function, testing_function = build_theano_functions( network, x, y, minibatch_index, args.minibatch_size, args.learning_rate, training_dataset, testing_dataset) print( "Starting stochastic gradient descent. num_training_examples={}" .format(num_training_examples, args.learning_rate, args.epochs, args.max_time)) training_time, training_epochs, testing_error = \ vi.theano.stochastic_gradient_descent( training_function, training_minibatch_count, testing_function, testing_minibatch_count, learning_rate=args.learning_rate, epochs=args.epochs, max_time=args.max_time, epoch_status_function=epoch_status_function) print(( "Training completed after {:.2f} seconds. {} epochs at {:.2f} epochs / second. " + "Testing error: {:.5f}%").format( training_time, training_epochs, training_epochs / training_time, testing_error * 100.0)) time.sleep(1) sys.stdout.flush() sys.stdout = temp_stdout