def download_and_save_data(self): self.train_file = join_paths(MNIST_DATA_FOLDER, 'train.txt') if not is_file(self.train_file): train = self.__download_data(train=True) print('Writing train text file...') savetxt(self.train_file, train) self.test_file = join_paths(MNIST_DATA_FOLDER, 'test.txt') if not is_file(self.test_file): test = self.__download_data(train=False) print('Writing test text file...') savetxt(self.test_file, test)
def __load_images(self, download_info): download_url = join_paths(self.__url_base, download_info.get('NAME_IMAGE')) print('Downloading ' + download_url) gzfname, h = urlretrieve(download_url, './delete.me') print('Done.') try: with gzip.open(gzfname) as gz: n = struct.unpack('I', gz.read(4)) if n[0] != MnistDownloader.CHECK_IMAGE_NUMBER: raise Exception('Invalid file: unexpected magic number.') n = struct.unpack('>I', gz.read(4))[0] if n != download_info.get('SAMPLES'): raise Exception( 'Invalid file: expected {0} entries.'.format( download_info.get('SAMPLES'))) crow = struct.unpack('>I', gz.read(4))[0] ccol = struct.unpack('>I', gz.read(4))[0] if crow != MnistDownloader.IMAGE_SIZE or ccol != MnistDownloader.IMAGE_SIZE: raise Exception( 'Invalid file: expected 28 rows/cols per image.') res = np.fromstring(gz.read( download_info.get('SAMPLES') * crow * ccol), dtype=np.uint8) finally: os.remove(gzfname) return res.reshape((download_info.get('SAMPLES'), crow * ccol))
def __load_labels(self, download_info): download_url = join_paths(self.__url_base, download_info.get('NAME_LABELS')) print('Downloading ' + download_url) gzfname, h = urlretrieve(download_url, './delete.me') print('Done.') try: with gzip.open(gzfname) as gz: n = struct.unpack('I', gz.read(4)) if n[0] != MnistDownloader.CHECK_LABELS_NUMBER: raise Exception('Invalid file: unexpected magic number.') n = struct.unpack('>I', gz.read(4)) if n[0] != download_info.get('SAMPLES'): raise Exception('Invalid file: expected {0} rows.'.format( download_info.get('SAMPLES'))) res = np.fromstring(gz.read(download_info.get('SAMPLES')), dtype=np.uint8) finally: os.remove(gzfname) return res.reshape((download_info.get('SAMPLES'), 1))
learner = cntk.sgd(model_definition.model.parameters, lr_schedule) tensor_writer = TensorWriter(model_definition.model) trainer = cntk.Trainer(model_definition.model, (model_definition.get_loss(), model_definition.get_classification_error()), [learner], tensor_writer.get_writer()) # Trainning minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size reader_train = init_reader(join_paths(MNIST_DATA_FOLDER, 'train.txt'), input_dim, num_output_classes) input_map = { model_definition.label: reader_train.streams.labels, model_definition.input: reader_train.streams.features } for i in range(0, int(num_minibatches_to_train)): data = reader_train.next_minibatch(minibatch_size, input_map=input_map) output = trainer.train_minibatch(data, outputs=[model_definition.input]) tensor_writer.write_model_params(i) #tensor_writer.write_image(output[1], i) batchsize, loss, error = progress(trainer, i, frequency=500, verbose=1) # Test