예제 #1
0
class ReadMnistCsv(object):
    """
    Class to read MNIST data from CSV files

    see: http://pjreddie.com/projects/mnist-in-csv/
         http://makeyourownneuralnetwork.blogspot.com/2015/03/the-mnist-dataset-of-handwitten-digits.html
    """
    CLASSES = {0: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
               1: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
               2: [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
               3: [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
               4: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
               5: [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
               6: [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
               7: [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
               8: [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
               9: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]}

    def __init__(self, input_file):
        """

        """
        self.examples = LearningExamples()
        f = open(input_file, 'r')
        mnist = f.readlines()
        f.close()
        cnt = 0
        for line in mnist:
            fields = line.split(',')
#            log.info('line %d: len fields = %d', cnt, len(fields))
            classes = self.CLASSES[int(fields[0])]
            pixels = list()
            for j in range(len(fields[1:])):
                pixels.append(float(fields[j + 1]) / 255.0)
            self.examples.add_data(pixels, classes)
            cnt += 1
            if cnt % 10000 == 0:
                log.info('line %d: len pixels = %d, len fields = %d', cnt, len(pixels), len(fields))

    def write_file(self, file_path):
        """

        :param file_path: path to desired output file
        :return:
        """
        self.examples.write_to(file_path)
예제 #2
0
    def __init__(self, input_file):
        """

        """
        self.examples = LearningExamples()
        f = open(input_file, 'r')
        mnist = f.readlines()
        f.close()
        cnt = 0
        for line in mnist:
            fields = line.split(',')
#            log.info('line %d: len fields = %d', cnt, len(fields))
            classes = self.CLASSES[int(fields[0])]
            pixels = list()
            for j in range(len(fields[1:])):
                pixels.append(float(fields[j + 1]) / 255.0)
            self.examples.add_data(pixels, classes)
            cnt += 1
            if cnt % 10000 == 0:
                log.info('line %d: len pixels = %d, len fields = %d', cnt, len(pixels), len(fields))
    input_filename = ''
    ann_num_inputs = 0
    ann_num_outputs = 0
#    sup_learn_data = SupLearningData()
    if options.cmd == 'enlarge_center_2x':
        try:
            #        with open(options.output_path, 'w') as output_path:
            ann_output_dim = int(options.ann_output_dim)
            filename_list = os.listdir(options.image_dir)
            log.info("%d files found in %s", len(filename_list), options.image_dir)
            rand.shuffle(filename_list)
            file_cnt = len(filename_list)
            ann_num_inputs = (2 * ann_output_dim) * (2 * ann_output_dim)
            ann_num_outputs = ann_output_dim * ann_output_dim
            learn_examples = LearningExamples()
            log.info("image_dir %s          ", options.image_dir)
            log.info("output_path %s        ", options.output_path)
            log.info("num_images %s         ", options.num_images)
            log.info("samples_per_image %s  ", options.samples_per_image)
            log.info("ann_output_dim %s     ", options.ann_output_dim)
            log.info("ann_num_inputs %d     ", ann_num_inputs)
            log.info("ann_num_outputs %d    ", ann_num_outputs)
            cnt = 0
            for input_filename in filename_list:
                if not bool(input_filename.endswith('.jpg')):
                    continue
                cnt += 1
                if cnt % 100 == 0:
                    log.info('checking image %d: "%s"', cnt, input_filename)
                if cnt > int(options.num_images):
예제 #4
0
                log.info('input_matrix has %d rows and %d cols', len(input_matrix), len(input_matrix[0]))
                log.info('output_matrix has %d rows and %d cols', len(output_matrix), len(output_matrix[0]))
                X_train = np.array(input_matrix)
                y_train = np.array(output_matrix)
                num_entries, num_input_fields, num_output_fields, input_matrix, output_matrix = sup_learn_data.read_fann_data(fann_test_data)
                X_val = np.array(input_matrix)
                y_val = np.array(output_matrix)
                data = [(X_train, y_train), (X_val, y_val)]
                do_mlp(dataset=data, n_hidden=[12, 12, 6], mean_loss_threshold=0.001, batch_size=1)
            elif (args.input_data_file_format == 'jsonz'):
                """
                Test with image data
                """
                jsonz_training_data = '/home/hemkenhg/workspace/theano/examples/image_data/enlarge_center_2x-8-1k-train-a.jsonz'
                jsonz_test_data = '/home/hemkenhg/workspace/theano/examples/image_data/enlarge_center_2x-8-1k-test-a.jsonz'
                learn_examples = LearningExamples()
                num_entries, num_input_fields, num_output_fields, input_matrix, output_matrix = learn_examples.read_from(jsonz_training_data)
                log.info('input_matrix has %d rows and %d cols', len(input_matrix), len(input_matrix[0]))
                log.info('output_matrix has %d rows and %d cols', len(output_matrix), len(output_matrix[0]))
                X_train = np.array(input_matrix)
                y_train = np.array(output_matrix)
                num_entries, num_input_fields, num_output_fields, input_matrix, output_matrix = learn_examples.read_from(jsonz_test_data)
                X_val = np.array(input_matrix)
                y_val = np.array(output_matrix)
                data = [(X_train, y_train), (X_val, y_val)]
                do_mlp(dataset=data, n_hidden=[256, 256, 256, 256, 256, 256, 256, 256, 256, 256], mean_loss_threshold=0.001, batch_size=10)
            elif (args.input_data_file_format == 'bar'):
                pass
            else:
                pass