class ReadMnistCsv(object): """ Class to read MNIST data from CSV files see: http://pjreddie.com/projects/mnist-in-csv/ http://makeyourownneuralnetwork.blogspot.com/2015/03/the-mnist-dataset-of-handwitten-digits.html """ CLASSES = {0: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], 1: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], 2: [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], 3: [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], 4: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], 5: [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], 6: [0, 0, 0, 0, 0, 0, 1, 0, 0, 0], 7: [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], 8: [0, 0, 0, 0, 0, 0, 0, 0, 1, 0], 9: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]} def __init__(self, input_file): """ """ self.examples = LearningExamples() f = open(input_file, 'r') mnist = f.readlines() f.close() cnt = 0 for line in mnist: fields = line.split(',') # log.info('line %d: len fields = %d', cnt, len(fields)) classes = self.CLASSES[int(fields[0])] pixels = list() for j in range(len(fields[1:])): pixels.append(float(fields[j + 1]) / 255.0) self.examples.add_data(pixels, classes) cnt += 1 if cnt % 10000 == 0: log.info('line %d: len pixels = %d, len fields = %d', cnt, len(pixels), len(fields)) def write_file(self, file_path): """ :param file_path: path to desired output file :return: """ self.examples.write_to(file_path)
def __init__(self, input_file): """ """ self.examples = LearningExamples() f = open(input_file, 'r') mnist = f.readlines() f.close() cnt = 0 for line in mnist: fields = line.split(',') # log.info('line %d: len fields = %d', cnt, len(fields)) classes = self.CLASSES[int(fields[0])] pixels = list() for j in range(len(fields[1:])): pixels.append(float(fields[j + 1]) / 255.0) self.examples.add_data(pixels, classes) cnt += 1 if cnt % 10000 == 0: log.info('line %d: len pixels = %d, len fields = %d', cnt, len(pixels), len(fields))
input_filename = '' ann_num_inputs = 0 ann_num_outputs = 0 # sup_learn_data = SupLearningData() if options.cmd == 'enlarge_center_2x': try: # with open(options.output_path, 'w') as output_path: ann_output_dim = int(options.ann_output_dim) filename_list = os.listdir(options.image_dir) log.info("%d files found in %s", len(filename_list), options.image_dir) rand.shuffle(filename_list) file_cnt = len(filename_list) ann_num_inputs = (2 * ann_output_dim) * (2 * ann_output_dim) ann_num_outputs = ann_output_dim * ann_output_dim learn_examples = LearningExamples() log.info("image_dir %s ", options.image_dir) log.info("output_path %s ", options.output_path) log.info("num_images %s ", options.num_images) log.info("samples_per_image %s ", options.samples_per_image) log.info("ann_output_dim %s ", options.ann_output_dim) log.info("ann_num_inputs %d ", ann_num_inputs) log.info("ann_num_outputs %d ", ann_num_outputs) cnt = 0 for input_filename in filename_list: if not bool(input_filename.endswith('.jpg')): continue cnt += 1 if cnt % 100 == 0: log.info('checking image %d: "%s"', cnt, input_filename) if cnt > int(options.num_images):
log.info('input_matrix has %d rows and %d cols', len(input_matrix), len(input_matrix[0])) log.info('output_matrix has %d rows and %d cols', len(output_matrix), len(output_matrix[0])) X_train = np.array(input_matrix) y_train = np.array(output_matrix) num_entries, num_input_fields, num_output_fields, input_matrix, output_matrix = sup_learn_data.read_fann_data(fann_test_data) X_val = np.array(input_matrix) y_val = np.array(output_matrix) data = [(X_train, y_train), (X_val, y_val)] do_mlp(dataset=data, n_hidden=[12, 12, 6], mean_loss_threshold=0.001, batch_size=1) elif (args.input_data_file_format == 'jsonz'): """ Test with image data """ jsonz_training_data = '/home/hemkenhg/workspace/theano/examples/image_data/enlarge_center_2x-8-1k-train-a.jsonz' jsonz_test_data = '/home/hemkenhg/workspace/theano/examples/image_data/enlarge_center_2x-8-1k-test-a.jsonz' learn_examples = LearningExamples() num_entries, num_input_fields, num_output_fields, input_matrix, output_matrix = learn_examples.read_from(jsonz_training_data) log.info('input_matrix has %d rows and %d cols', len(input_matrix), len(input_matrix[0])) log.info('output_matrix has %d rows and %d cols', len(output_matrix), len(output_matrix[0])) X_train = np.array(input_matrix) y_train = np.array(output_matrix) num_entries, num_input_fields, num_output_fields, input_matrix, output_matrix = learn_examples.read_from(jsonz_test_data) X_val = np.array(input_matrix) y_val = np.array(output_matrix) data = [(X_train, y_train), (X_val, y_val)] do_mlp(dataset=data, n_hidden=[256, 256, 256, 256, 256, 256, 256, 256, 256, 256], mean_loss_threshold=0.001, batch_size=10) elif (args.input_data_file_format == 'bar'): pass else: pass