def download_and_save_data(self):

        self.train_file = join_paths(MNIST_DATA_FOLDER, 'train.txt')
        if not is_file(self.train_file):
            train = self.__download_data(train=True)
            print('Writing train text file...')
            savetxt(self.train_file, train)

        self.test_file = join_paths(MNIST_DATA_FOLDER, 'test.txt')
        if not is_file(self.test_file):
            test = self.__download_data(train=False)
            print('Writing test text file...')
            savetxt(self.test_file, test)
 def __load_images(self, download_info):
     download_url = join_paths(self.__url_base,
                               download_info.get('NAME_IMAGE'))
     print('Downloading ' + download_url)
     gzfname, h = urlretrieve(download_url, './delete.me')
     print('Done.')
     try:
         with gzip.open(gzfname) as gz:
             n = struct.unpack('I', gz.read(4))
             if n[0] != MnistDownloader.CHECK_IMAGE_NUMBER:
                 raise Exception('Invalid file: unexpected magic number.')
             n = struct.unpack('>I', gz.read(4))[0]
             if n != download_info.get('SAMPLES'):
                 raise Exception(
                     'Invalid file: expected {0} entries.'.format(
                         download_info.get('SAMPLES')))
             crow = struct.unpack('>I', gz.read(4))[0]
             ccol = struct.unpack('>I', gz.read(4))[0]
             if crow != MnistDownloader.IMAGE_SIZE or ccol != MnistDownloader.IMAGE_SIZE:
                 raise Exception(
                     'Invalid file: expected 28 rows/cols per image.')
             res = np.fromstring(gz.read(
                 download_info.get('SAMPLES') * crow * ccol),
                                 dtype=np.uint8)
     finally:
         os.remove(gzfname)
     return res.reshape((download_info.get('SAMPLES'), crow * ccol))
 def __load_labels(self, download_info):
     download_url = join_paths(self.__url_base,
                               download_info.get('NAME_LABELS'))
     print('Downloading ' + download_url)
     gzfname, h = urlretrieve(download_url, './delete.me')
     print('Done.')
     try:
         with gzip.open(gzfname) as gz:
             n = struct.unpack('I', gz.read(4))
             if n[0] != MnistDownloader.CHECK_LABELS_NUMBER:
                 raise Exception('Invalid file: unexpected magic number.')
             n = struct.unpack('>I', gz.read(4))
             if n[0] != download_info.get('SAMPLES'):
                 raise Exception('Invalid file: expected {0} rows.'.format(
                     download_info.get('SAMPLES')))
             res = np.fromstring(gz.read(download_info.get('SAMPLES')),
                                 dtype=np.uint8)
     finally:
         os.remove(gzfname)
     return res.reshape((download_info.get('SAMPLES'), 1))
Esempio n. 4
0
    learner = cntk.sgd(model_definition.model.parameters, lr_schedule)

    tensor_writer = TensorWriter(model_definition.model)
    trainer = cntk.Trainer(model_definition.model,
                           (model_definition.get_loss(),
                            model_definition.get_classification_error()),
                           [learner], tensor_writer.get_writer())

    # Trainning
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) / minibatch_size

    reader_train = init_reader(join_paths(MNIST_DATA_FOLDER, 'train.txt'),
                               input_dim, num_output_classes)
    input_map = {
        model_definition.label: reader_train.streams.labels,
        model_definition.input: reader_train.streams.features
    }

    for i in range(0, int(num_minibatches_to_train)):
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        output = trainer.train_minibatch(data,
                                         outputs=[model_definition.input])
        tensor_writer.write_model_params(i)
        #tensor_writer.write_image(output[1], i)
        batchsize, loss, error = progress(trainer, i, frequency=500, verbose=1)

    # Test