Esempio n. 1
0
	def read_data(self, noOfImages):
		images = []
		a,b = mb.load_all_flat_cases()
		for i in range(noOfImages):
			images.append([a[i],b[i]])

		return noOfImages, images
Esempio n. 2
0
def main(sizes, training_iteration, act, test=False):
    features, labels = db.load_all_flat_cases()
    features = np.asarray(features)
    features = features / 255
    labels = format_labels(labels, 10)
    ann = Ann(sizes, features, labels, act)

    # x = list(range(20))
    # y = []
    """Training the network"""
    for i in range(training_iteration):
        for start, end in zip(range(0, len(features), 128),
                              range(128, len(features), 128)):
            ann.cost = ann.train(features[start:end], labels[start:end])
        print("Training Step: ", i + 1, "/", training_iteration)
        """Where the network's accuracy is tested"""
        if test:
            features_test, labels_test = db.load_all_flat_cases('testing')
            features_test = np.asarray(features_test)
            features_test = features_test / 255.
            labels_test = format_labels(labels_test, 10)
            test_case = features_test
            answer = labels_test
            num = 0
            predictions = ann.predict(test_case)
            for p in range(len(predictions)):
                if predictions[p] == np.argmax(answer[p]): num += 1

            print(str(num) + " / 10000")
            #y.append(num)

    # canvas = plt.figure()
    # rect = canvas.patch
    # rect.set_facecolor("white")

    # sp1 = canvas.add_subplot(1,1,1, axisbg="w")
    # sp1.plot(x, y, "red", linewidth=2)
    # canvas.suptitle("Accuracy")
    # plt.xlabel("Run nr")
    # plt.ylabel("Correct classifications")
    # plt.ylim([2000,10000])
    # plt.show()

    if not test:
        temp = input("WAIT....")
        major_demo(ann, 10, "basics/")
    def converteFlatMnistTo2D(self):
        cases2D = []
        cases = MNIST.load_all_flat_cases(
        )  # returns [ [cases] , [targets] ] --> cases = [ [features...] ]

        for f, t in zip(cases[0], cases[1]):
            f = [feature / 255 for feature in f]
            case = [f, t]
            cases2D.append(case)
        return cases2D
Esempio n. 4
0
def get_mnist_cases(cfrac):
    inputs, targets_vector = load_all_flat_cases('training', '')
    target_size = max(targets_vector) + 1

    targets = []
    for target_value in targets_vector:
        target = [0] * target_size
        target[target_value - 1] = 1
        targets.append(target)

    cases = [[inputs[i], targets[i]] for i in range(len(inputs))]

    if cfrac:
        cases = sample(cases, math.ceil(len(cases) * cfrac))
    return cases
Esempio n. 5
0
def getData(ds, caseFraction, noClasses):
    dataStructured = []
    if ds == "mnist":
        features, labels = mb.load_all_flat_cases()
        scale_MNIST(features)
        ohl = make_one_hot(labels, noClasses)
        for i in range(len(features)):
            dataStructured.append([features[i], ohl[i]])
    elif len(ds) > 4 and ds[-4] == '.':
        dataStructured = getTextFileData(ds, caseFraction, noClasses)
    else:
        t = ds.split(';')
        temp = t[1].split(',')
        par = list(map(int, temp))
        dataStructured = getattr(TFT, t[0])(*par)
    if caseFraction != 1:
        random.shuffle(dataStructured)
        dataStructured = dataStructured[:int(caseFraction *
                                             len(dataStructured))]
    return dataStructured
Esempio n. 6
0
    def source(self):
        def normalize(cases):
            input = [c[0] for c in cases]
            target = [c[1] for c in cases]
            input = numpy.array(input)
            min_arr = numpy.min(input, axis=0)
            max_arr = numpy.max(input, axis=0)
            for element in input:
                for i, e in enumerate(element):
                    element[i] = (e - min_arr[i]) / (max_arr[i] - min_arr[i])
            return list(zip(input, target))

        def to_float(inp):
            # returns 0 if input is ? (questionmark)
            return 0 if inp == '?' else float(inp)

        self.source_is_called = True
        print("source:", self.args.source)
        data_set = []
        if self.args.source[-4:] == ".txt":
            with open("data_set_files/" + self.args.source) as file:
                data = list(
                    map(lambda x: re.split("[;,]", x), file.readlines()))
                data = list(map(lambda x: list(map(to_float, x)), data))
            max_d = max(map(lambda x: int(x[-1]), data))
            for element in data:
                input = element[:-1]
                target = TFT.int_to_one_hot(int(element[-1]) - 1, max_d)
                data_set.append([input, target])
        elif self.args.source == "parity":
            if self.args.sourceinit is None:
                data_set = TFT.gen_all_parity_cases(10)
            else:
                data_set = TFT.gen_all_parity_cases(self.args.sourceinit[0])
        elif self.args.source == "symmetry":
            if self.args.sourceinit is None:
                vecs = TFT.gen_symvect_dataset(101, 2000)
            else:
                vecs = TFT.gen_symvect_dataset(self.args.sourceinit[0],
                                               self.args.sourceinit[1])
            inputs = list(map(lambda x: x[:-1], vecs))
            targets = list(map(lambda x: TFT.int_to_one_hot(x[-1], 2), vecs))
            data_set = list(zip(inputs, targets))
        elif self.args.source == "auto_onehot":
            if self.args.sourceinit is None:
                data_set = TFT.gen_all_one_hot_cases(64)
            else:
                data_set = TFT.gen_all_one_hot_cases(self.args.sourceinit[0])
        elif self.args.source == "auto_dense":
            if self.args.sourceinit is None:
                data_set = TFT.gen_dense_autoencoder_cases(2000, 100)
            else:
                data_set = TFT.gen_dense_autoencoder_cases(
                    self.args.sourceinit[0], self.args.sourceinit[1])
        elif self.args.source == "bitcounter":
            if self.args.sourceinit is None:
                data_set = TFT.gen_vector_count_cases(500, 15)
            else:
                data_set = TFT.gen_vector_count_cases(self.args.sourceinit[0],
                                                      self.args.sourceinit[1])
        elif self.args.source == "segmentcounter":
            if self.args.sourceinit is None:
                data_set = TFT.gen_segmented_vector_cases(25, 1000, 0, 8)
            else:
                data_set = TFT.gen_segmented_vector_cases(self.args.sourceinit[0], \
                            self.args.sourceinit[1], self.args.sourceinit[2], self.args.sourceinit[3])
        elif self.args.source == "mnist":
            # mnist_basics.load_all_flat_cases(type='testing')
            cases = mnist_basics.load_all_flat_cases(type='training')
            input = cases[0]
            target = cases[1]
            input = list(map(lambda x: list(map(lambda e: e / 255, x)), input))
            target = list(map(lambda x: TFT.int_to_one_hot(x, 10), target))
            data_set = list(zip(input, target))

        if data_set == []:
            print(self.args.source, " is illegal for argument --source")
            print("Legal values are: <filenme>.txt, parity, symmetry, \
                        auto_onehot, auto_dense, bitcounter, segmentcounter",
                  sep="")
            quit()
        if self.args.source[-4:] == ".txt":
            data_set = normalize(data_set)
        return data_set
Esempio n. 7
0
def main():
    def epoch_status_function(epoch_time, epoch, average_loss, testing_error, is_best):
        if is_best:
            with open(os.path.join(base_path, 'model.pkl'), 'wb') as model_file:
                pickle.dump(network, model_file)

        with open(os.path.join(base_path, 'loss.txt'), 'at') as error_file:
            print('{} {:.4f} {:.10f}'.format(epoch, epoch_time, average_loss), file=error_file)

        with open(os.path.join(base_path, 'error.txt'), 'at') as error_file:
            print('{} {:.4f} {:.10f}'.format(epoch, epoch_time, testing_error * 100.0), file=error_file)

        print("Time: {:7.2f} sec, Epoch: {:4d}, Testing error: {:.5f}%".format(
            epoch_time, epoch, testing_error * 100.0))

    def setup_base_path(runs):
        def format_float(f):
            return '{:f}'.format(f).rstrip('0').rstrip('.')

        base_path = os.path.join(args.output_directory,
            'mnist_network_{}_layers_{}_activation_{}_learning_{}_minibatches{}{}{}'.format(
                '-'.join(args.hidden_layers),
                args.hidden_function,
                format_float(args.learning_rate),
                args.minibatch_size,
                ('_' + format_float(args.L1) + '_L1') if args.L1 else '',
                ('_' + format_float(args.L2) + '_L2') if args.L2 else '',
                ('_' + format_float(args.dropout) + '_dropout') if args.dropout else ''))

        if runs:
            run_index = 1

            while True:
                base_path_run = os.path.join(base_path, str(run_index))

                if os.path.isdir(base_path_run):
                    run_index += 1
                else:
                    base_path = base_path_run
                    break

        if not os.path.isdir(base_path):
            os.makedirs(base_path)

        existing_files = [ f for f in os.listdir(base_path) ]
        for existing_file in existing_files:
            try:
                os.remove(existing_file)
            except:
                pass

        return base_path

    parser = argparse.ArgumentParser()
    parser.add_argument('--L1', type=float)
    parser.add_argument('--L2', type=float)
    parser.add_argument('--R', type=int)
    parser.add_argument('--data')
    parser.add_argument('--dropout', type=float)
    parser.add_argument('--epochs', type=int, default=100)
    parser.add_argument('--hidden_function', default='relu')
    parser.add_argument('--hidden_layers', nargs='*')
    parser.add_argument('--learning_rate', type=float, default=0.005)
    parser.add_argument('--major_demo', action='store_true')
    parser.add_argument('--max_time', type=int)
    parser.add_argument('--minibatch_size', type=int, default=40)
    parser.add_argument('--minor_demo', action='store_true')
    parser.add_argument('--model')
    parser.add_argument('--output_directory', default='../data')
    parser.add_argument('--runs', action='store_true')
    parser.add_argument('--seed', type=int)
    parser.add_argument('--training_ratio', type=float)
    args = parser.parse_args()

    print(args)

    if args.seed:
        random.seed(args.seed)
        numpy.random.seed(random.randint(0, 2 ** 30))

    if (args.major_demo or args.minor_demo) and not args.model:
        print("No model file provided.", file=sys.stderr)
        sys.exit(-1)

    if args.model:
        network = pickle.load(open(args.model, 'rb'))

        predict_function = theano.function(
            inputs=[network.inputs],
            outputs=network.classify())

        if args.major_demo:
            if not args.R:
                print("No R parameter provided.", file=sys.stderr)
                sys.exit(-1)

            ann = Ann(predict_function)
            d   = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../mnist/basics/')
            print('r = {} d = {}'.format(args.R, d))

            mnistdemo.major_demo(ann, args.R, d)
        elif args.minor_demo:
            mnist_basics.minor_demo(Ann(predict_function, args.minibatch_size))
        elif args.data:
            dataset, labelset = pickle.load(open(args.data, 'rb'))

            total = len(dataset)
            correct = sum(predict_function(dataset[i:i+1])[0] == labelset[i:i+1][0] for i in range(len(dataset)))

            print("{}/{} ({:.2f}%)".format(correct, total, 100.0 * correct / total))
        else:
            print("No data file provided.", file=sys.stderr)
            sys.exit(-1)
    else:
        base_path = setup_base_path(args.runs)

        with open(os.path.join(base_path, 'log.txt'), 'at') as log_file:
            temp_stdout = sys.stdout
            sys.stdout  = Tee(sys.stdout, log_file)

            layer_sizes = [28 * 28] + list(map(int, args.hidden_layers or [])) + [10]

            print("Loading dataset 'training' from file...")
            flat_training_data, flat_training_labels = mnist_basics.load_all_flat_cases('training', dir='../mnist/basics/')

            print("Loading dataset 'testing' from file...")
            flat_testing_data, flat_testing_labels = mnist_basics.load_all_flat_cases('testing', dir='../mnist/basics/')

            print("Creating shared Theano dataset variables...")

            num_training_examples = int(math.ceil(args.training_ratio * len(flat_training_data))) if args.training_ratio else len(flat_training_data)

            training_dataset = build_theano_dataset(flat_training_data[:num_training_examples], flat_training_labels[:num_training_examples])
            testing_dataset  = build_theano_dataset(flat_testing_data, flat_testing_labels)

            minibatch_index = T.lscalar()
            x               = T.matrix('x')
            y               = T.ivector('y')

            activation_functions = { "relu": theano.tensor.nnet.relu, "sigmoid": theano.tensor.nnet.sigmoid, "tanh": theano.tensor.tanh }

            network = vi.theano.Network(
                x, layer_sizes, activation_functions[args.hidden_function], args.dropout, args.L1, args.L2)

            training_minibatch_count = int(math.ceil(training_dataset.size / args.minibatch_size))
            testing_minibatch_count  = int(math.ceil(testing_dataset.size  / args.minibatch_size))

            training_function, testing_function = build_theano_functions(
                network, x, y, minibatch_index, args.minibatch_size, args.learning_rate,
                training_dataset, testing_dataset)

            print("Starting stochastic gradient descent. num_training_examples={}".format(
                num_training_examples, args.learning_rate, args.epochs, args.max_time))

            training_time, training_epochs, testing_error = \
                vi.theano.stochastic_gradient_descent(
                    training_function,
                    training_minibatch_count,
                    testing_function,
                    testing_minibatch_count,
                    learning_rate=args.learning_rate,
                    epochs=args.epochs,
                    max_time=args.max_time,
                    epoch_status_function=epoch_status_function)

            print(("Training completed after {:.2f} seconds. {} epochs at {:.2f} epochs / second. " +
                   "Testing error: {:.5f}%").format(
                training_time,
                training_epochs,
                training_epochs / training_time,
                testing_error * 100.0))

            time.sleep(1)
            sys.stdout.flush()
            sys.stdout = temp_stdout
Esempio n. 8
0
 def read_mnist(self):
     cases, label = mnists.load_all_flat_cases()
     cases = np.array(cases) / 255
     for i in range(int(len(cases))):
         self.cases.append(
             [cases[i], TFT.int_to_one_hot(int(label[i]), 10)])
Esempio n. 9
0
    def generate_cases(self):
        # Run the case generator.  Case = [input-vector, target-vector]
        if (self.casenr == 0):
            # Parity
            # params: num_bits, double=True
            try:
                num_bits = int(self.params[0])

                if (len(self.params) > 1):
                    double = self.params[1]
                else:
                    double = True
                self.cases = TFT.gen_all_parity_cases(num_bits, double)
            except ValueError:
                print("Case Parameters not valid")
        elif (self.casenr == 1):
            # Autoencoder - NO performance testing
            # TODO: Add if time; NOTE THIS IS THE CORRECT GENERATION
            # Can choose between this:
            try:
                nbits = int(self.params[0])
                for i in range(0, 100):
                    cases = TFT.gen_all_one_hot_cases(2**nbits)
                    for j in range(0, len(cases)):
                        print(cases[j])
                        self.cases.append(cases[j])
            except ValueError:
                print("Case Parameters not valid")
            # OR this:
            # self.cases = TFT.gen_dense_autoencoder_cases(count,size,dr=(0,1)
        elif (self.casenr == 2):
            # Bit counter
            try:
                ncases = int(self.params[0])
                nbits = int(self.params[1])
                self.cases = TFT.gen_vector_count_cases(ncases, nbits)
            except ValueError:
                print("Case Parameters not valid")

        elif (self.casenr == 3):
            # Segment counter
            try:
                size = int(self.params[0])
                count = int(self.params[1])
                minsegs = int(self.params[2])
                maxsegs = int(self.params[3])
                if (len(self.params) > 4):
                    poptargs = False
                else:
                    poptargs = True

                self.cases = TFT.gen_segmented_vector_cases(
                    size, count, minsegs, maxsegs, poptargs)
            except ValueError:
                print("Case Parameters not valid")

        elif (self.casenr == 4):
            # MNIST
            cases = mnist.load_all_flat_cases()
            for case in cases:
                inp = case[:-1]
                for j, num in enumerate(inp):
                    inp[j] = num / 255
                label = TFT.int_to_one_hot(
                    case[-1],
                    10,
                )
                self.cases.append([inp, label])
        elif (self.casenr == 5):
            # Wine quality
            f = open("./datasets/winequality_red.txt", "r")
            for line in f:
                arr = [float(i) for i in line.split(';')[:-1]]
                label = TFT.int_to_one_hot(int(line.split(';')[-1][0]) - 3, 6)
                self.cases.append([arr, label])
        elif (self.casenr == 6):
            # Glass
            f = open("./datasets/glass.txt", "r")
            for line in f:
                arr = [float(i) for i in line.split(',')[:-1]]
                arr[1] = arr[1] / 13
                arr[4] = arr[4] / 74
                arr[6] = arr[6] / 8
                label = TFT.int_to_one_hot(int(line.split(',')[-1][0]) - 1, 7)
                self.cases.append([arr, label])
        elif (self.casenr == 7):
            # Yeast
            f = open("./datasets/yeast.txt", "r")
            for line in f:
                arr = [float(i) for i in line.split(',')[:-1]]
                label = TFT.int_to_one_hot(int(line.split(',')[-1][0]) - 1, 10)
                self.cases.append([arr, label])
        elif (self.casenr == 8):
            # Hackers choice
            # TODO
            self.cases = None
        else:
            print("not a valid case")
        print(len(self.cases), "cases generated")
Esempio n. 10
0
def main():
    def epoch_status_function(epoch_time, epoch, average_loss, testing_error,
                              is_best):
        if is_best:
            with open(os.path.join(base_path, 'model.pkl'),
                      'wb') as model_file:
                pickle.dump(network, model_file)

        with open(os.path.join(base_path, 'loss.txt'), 'at') as error_file:
            print('{} {:.4f} {:.10f}'.format(epoch, epoch_time, average_loss),
                  file=error_file)

        with open(os.path.join(base_path, 'error.txt'), 'at') as error_file:
            print('{} {:.4f} {:.10f}'.format(epoch, epoch_time,
                                             testing_error * 100.0),
                  file=error_file)

        print("Time: {:7.2f} sec, Epoch: {:4d}, Testing error: {:.5f}%".format(
            epoch_time, epoch, testing_error * 100.0))

    def setup_base_path(runs):
        def format_float(f):
            return '{:f}'.format(f).rstrip('0').rstrip('.')

        base_path = os.path.join(
            args.output_directory,
            'mnist_network_{}_layers_{}_activation_{}_learning_{}_minibatches{}{}{}'
            .format('-'.join(args.hidden_layers), args.hidden_function,
                    format_float(args.learning_rate), args.minibatch_size,
                    ('_' + format_float(args.L1) + '_L1') if args.L1 else '',
                    ('_' + format_float(args.L2) + '_L2') if args.L2 else '',
                    ('_' + format_float(args.dropout) +
                     '_dropout') if args.dropout else ''))

        if runs:
            run_index = 1

            while True:
                base_path_run = os.path.join(base_path, str(run_index))

                if os.path.isdir(base_path_run):
                    run_index += 1
                else:
                    base_path = base_path_run
                    break

        if not os.path.isdir(base_path):
            os.makedirs(base_path)

        existing_files = [f for f in os.listdir(base_path)]
        for existing_file in existing_files:
            try:
                os.remove(existing_file)
            except:
                pass

        return base_path

    parser = argparse.ArgumentParser()
    parser.add_argument('--L1', type=float)
    parser.add_argument('--L2', type=float)
    parser.add_argument('--R', type=int)
    parser.add_argument('--data')
    parser.add_argument('--dropout', type=float)
    parser.add_argument('--epochs', type=int, default=100)
    parser.add_argument('--hidden_function', default='relu')
    parser.add_argument('--hidden_layers', nargs='*')
    parser.add_argument('--learning_rate', type=float, default=0.005)
    parser.add_argument('--major_demo', action='store_true')
    parser.add_argument('--max_time', type=int)
    parser.add_argument('--minibatch_size', type=int, default=40)
    parser.add_argument('--minor_demo', action='store_true')
    parser.add_argument('--model')
    parser.add_argument('--output_directory', default='../data')
    parser.add_argument('--runs', action='store_true')
    parser.add_argument('--seed', type=int)
    parser.add_argument('--training_ratio', type=float)
    args = parser.parse_args()

    print(args)

    if args.seed:
        random.seed(args.seed)
        numpy.random.seed(random.randint(0, 2**30))

    if (args.major_demo or args.minor_demo) and not args.model:
        print("No model file provided.", file=sys.stderr)
        sys.exit(-1)

    if args.model:
        network = pickle.load(open(args.model, 'rb'))

        predict_function = theano.function(inputs=[network.inputs],
                                           outputs=network.classify())

        if args.major_demo:
            if not args.R:
                print("No R parameter provided.", file=sys.stderr)
                sys.exit(-1)

            ann = Ann(predict_function)
            d = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             '../mnist/basics/')
            print('r = {} d = {}'.format(args.R, d))

            mnistdemo.major_demo(ann, args.R, d)
        elif args.minor_demo:
            mnist_basics.minor_demo(Ann(predict_function, args.minibatch_size))
        elif args.data:
            dataset, labelset = pickle.load(open(args.data, 'rb'))

            total = len(dataset)
            correct = sum(
                predict_function(dataset[i:i + 1])[0] == labelset[i:i + 1][0]
                for i in range(len(dataset)))

            print("{}/{} ({:.2f}%)".format(correct, total,
                                           100.0 * correct / total))
        else:
            print("No data file provided.", file=sys.stderr)
            sys.exit(-1)
    else:
        base_path = setup_base_path(args.runs)

        with open(os.path.join(base_path, 'log.txt'), 'at') as log_file:
            temp_stdout = sys.stdout
            sys.stdout = Tee(sys.stdout, log_file)

            layer_sizes = [28 * 28] + list(map(int, args.hidden_layers
                                               or [])) + [10]

            print("Loading dataset 'training' from file...")
            flat_training_data, flat_training_labels = mnist_basics.load_all_flat_cases(
                'training', dir='../mnist/basics/')

            print("Loading dataset 'testing' from file...")
            flat_testing_data, flat_testing_labels = mnist_basics.load_all_flat_cases(
                'testing', dir='../mnist/basics/')

            print("Creating shared Theano dataset variables...")

            num_training_examples = int(
                math.ceil(args.training_ratio * len(flat_training_data))
            ) if args.training_ratio else len(flat_training_data)

            training_dataset = build_theano_dataset(
                flat_training_data[:num_training_examples],
                flat_training_labels[:num_training_examples])
            testing_dataset = build_theano_dataset(flat_testing_data,
                                                   flat_testing_labels)

            minibatch_index = T.lscalar()
            x = T.matrix('x')
            y = T.ivector('y')

            activation_functions = {
                "relu": theano.tensor.nnet.relu,
                "sigmoid": theano.tensor.nnet.sigmoid,
                "tanh": theano.tensor.tanh
            }

            network = vi.theano.Network(
                x, layer_sizes, activation_functions[args.hidden_function],
                args.dropout, args.L1, args.L2)

            training_minibatch_count = int(
                math.ceil(training_dataset.size / args.minibatch_size))
            testing_minibatch_count = int(
                math.ceil(testing_dataset.size / args.minibatch_size))

            training_function, testing_function = build_theano_functions(
                network, x, y, minibatch_index, args.minibatch_size,
                args.learning_rate, training_dataset, testing_dataset)

            print(
                "Starting stochastic gradient descent. num_training_examples={}"
                .format(num_training_examples, args.learning_rate, args.epochs,
                        args.max_time))

            training_time, training_epochs, testing_error = \
                vi.theano.stochastic_gradient_descent(
                    training_function,
                    training_minibatch_count,
                    testing_function,
                    testing_minibatch_count,
                    learning_rate=args.learning_rate,
                    epochs=args.epochs,
                    max_time=args.max_time,
                    epoch_status_function=epoch_status_function)

            print((
                "Training completed after {:.2f} seconds. {} epochs at {:.2f} epochs / second. "
                + "Testing error: {:.5f}%").format(
                    training_time, training_epochs,
                    training_epochs / training_time, testing_error * 100.0))

            time.sleep(1)
            sys.stdout.flush()
            sys.stdout = temp_stdout