Esempio n. 1
0
#下载下来是一种二进制文件,并不是图片,因此我先转换成了图片。
#我先解压gz文件到文件夹
#然后运行代码:
import os
from skimage import io
import torchvision.datasets.mnist as mnist

root = "F:/python_project/demo05pytorch训练和测试自己的图片数据/"
train_set = (mnist.read_image_file(
    os.path.join(root, 'train-images-idx3-ubyte')),
             mnist.read_label_file(
                 os.path.join(root, 'train-labels-idx1-ubyte')))
test_set = (mnist.read_image_file(os.path.join(root,
                                               't10k-images-idx3-ubyte')),
            mnist.read_label_file(os.path.join(root,
                                               't10k-labels-idx1-ubyte')))
print("training set :", train_set[0].size())
print("test set :", test_set[0].size())


def convert_to_img(train=True):
    if (train):
        f = open(root + 'train.txt', 'w')
        data_path = root + '/train/'
        if (not os.path.exists(data_path)):
            os.makedirs(data_path)
        for i, (img, label) in enumerate(zip(train_set[0], train_set[1])):
            img_path = data_path + str(i) + '.jpg'
            io.imsave(img_path, img.numpy())
            f.write(img_path + ' ' + str(label) + '\n')
        f.close()
def test_to_superpixels():
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))

    raw_folder = osp.join(root, 'MNIST', 'raw')
    processed_folder = osp.join(root, 'MNIST', 'processed')

    makedirs(raw_folder)
    makedirs(processed_folder)
    for resource in resources:
        path = download_url(resource, raw_folder)
        extract_gz(path, osp.join(root, raw_folder))

    test_set = (
        read_image_file(osp.join(raw_folder, 't10k-images-idx3-ubyte')),
        read_label_file(osp.join(raw_folder, 't10k-labels-idx1-ubyte')),
    )

    torch.save(test_set, osp.join(processed_folder, 'training.pt'))
    torch.save(test_set, osp.join(processed_folder, 'test.pt'))

    dataset = MNIST(root, download=False)

    dataset.transform = T.Compose([T.ToTensor(), ToSLIC()])

    data, y = dataset[0]
    assert len(data) == 2
    assert data.pos.dim() == 2 and data.pos.size(1) == 2
    assert data.x.dim() == 2 and data.x.size(1) == 1
    assert data.pos.size(0) == data.x.size(0)
    assert y == 7

    loader = DataLoader(dataset, batch_size=2, shuffle=False)
    for data, y in loader:
        assert len(data) == 4
        assert data.pos.dim() == 2 and data.pos.size(1) == 2
        assert data.x.dim() == 2 and data.x.size(1) == 1
        assert data.batch.dim() == 1
        assert data.ptr.dim() == 1
        assert data.pos.size(0) == data.x.size(0) == data.batch.size(0)
        assert y.tolist() == [7, 2]
        break

    dataset.transform = T.Compose(
        [T.ToTensor(), ToSLIC(add_seg=True, add_img=True)])

    data, y = dataset[0]
    assert len(data) == 4
    assert data.pos.dim() == 2 and data.pos.size(1) == 2
    assert data.x.dim() == 2 and data.x.size(1) == 1
    assert data.pos.size(0) == data.x.size(0)
    assert data.seg.size() == (1, 28, 28)
    assert data.img.size() == (1, 1, 28, 28)
    assert data.seg.max().item() + 1 == data.x.size(0)
    assert y == 7

    loader = DataLoader(dataset, batch_size=2, shuffle=False)
    for data, y in loader:
        assert len(data) == 6
        assert data.pos.dim() == 2 and data.pos.size(1) == 2
        assert data.x.dim() == 2 and data.x.size(1) == 1
        assert data.batch.dim() == 1
        assert data.ptr.dim() == 1
        assert data.pos.size(0) == data.x.size(0) == data.batch.size(0)
        assert data.seg.size() == (2, 28, 28)
        assert data.img.size() == (2, 1, 28, 28)
        assert y.tolist() == [7, 2]
        break

    shutil.rmtree(root)
def main():
    parser = get_parser()
    args = parser.parse_args()
    ######################################################################
    # Loading the data
    # ----------------
    #
    # In this post we experiment with the classic MNIST dataset. Using a
    # standard convolutional network augmented with a spatial transformer
    # network.
    data_dir = args.data_dir
    training_dir = args.training_dir

    # If already processed
    if (not path.exists(
            path.join(training_dir, mnist.MNIST.processed_folder,
                      mnist.MNIST.training_file))
            or not path.join(training_dir, mnist.MNIST.processed_folder,
                             mnist.MNIST.test_file)):
        # process and save as torch files
        LOG.info('Processing dataset...')

        files = os.listdir(data_dir)
        for file in files:
            full_path = path.join(data_dir, file)
            save_path = path.join(training_dir, file.replace('.gz', ''))
            with open(save_path,
                      'wb') as out_f, gzip.GzipFile(full_path) as zip_f:
                out_f.write(zip_f.read())

        training_set = (mnist.read_image_file(
            path.join(training_dir, 'train-images-idx3-ubyte')),
                        mnist.read_label_file(
                            path.join(training_dir,
                                      'train-labels-idx1-ubyte')))
        test_set = (mnist.read_image_file(
            path.join(training_dir, 't10k-images-idx3-ubyte')),
                    mnist.read_label_file(
                        path.join(training_dir, 't10k-labels-idx1-ubyte')))
        os.makedirs(path.join(training_dir, mnist.MNIST.processed_folder))
        with open(
                path.join(training_dir, mnist.MNIST.processed_folder,
                          mnist.MNIST.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(
                path.join(training_dir, mnist.MNIST.processed_folder,
                          mnist.MNIST.test_file), 'wb') as f:
            torch.save(test_set, f)

        LOG.info('Dataset processing done!')

    # Training dataset
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        root=training_dir,
        train=True,
        download=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4)
    # Test dataset
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        root=training_dir,
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=4)

    model = Net()
    if use_cuda:
        model.cuda()

    ######################################################################
    # Training the model
    # ------------------
    #
    # Now, let's use the SGD algorithm to train the model. The network is
    # learning the classification task in a supervised way. In the same time
    # the model is learning STN automatically in an end-to-end fashion.

    optimizer = optim.SGD(model.parameters(), lr=0.01)

    for epoch in range(1, args.epochs + 1):
        train(epoch,
              train_loader,
              model,
              optimizer,
              use_mlboard=not args.skip_mlboard)
        test(test_loader, model, use_mlboard=not args.skip_mlboard)

    # Visualize the STN transformation on some input batch
    visualize_stn(test_loader, model, args.training_dir)
Esempio n. 4
0
    def download(self):
        """Download the EMNIST data if it doesn't exist in processed_folder already."""
        import errno
        from six.moves import urllib
        import gzip
        import shutil
        import zipfile

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        print('Downloading ' + self.url)
        data = urllib.request.urlopen(self.url)
        filename = self.url.rpartition('/')[2]
        raw_folder = os.path.join(self.root, self.raw_folder)
        file_path = os.path.join(raw_folder, filename)
        with open(file_path, 'wb') as f:
            f.write(data.read())

        print('Extracting zip archive')
        with zipfile.ZipFile(file_path) as zip_f:
            zip_f.extractall(raw_folder)
        os.unlink(file_path)
        gzip_folder = os.path.join(raw_folder, 'gzip')
        for gzip_file in os.listdir(gzip_folder):
            if gzip_file.endswith('.gz'):
                print('Extracting ' + gzip_file)
                with open(os.path.join(raw_folder, gzip_file.replace('.gz', '')), 'wb') as out_f, \
                        gzip.GzipFile(os.path.join(gzip_folder, gzip_file)) as zip_f:
                    out_f.write(zip_f.read())
        shutil.rmtree(gzip_folder)

        # process and save as torch files
        for split in self.splits:
            print('Processing ' + split)
            training_set = (
                read_image_file(
                    os.path.join(
                        raw_folder,
                        'emnist-{}-train-images-idx3-ubyte'.format(split))),
                read_label_file(
                    os.path.join(
                        raw_folder,
                        'emnist-{}-train-labels-idx1-ubyte'.format(split))))
            test_set = (
                read_image_file(
                    os.path.join(
                        raw_folder,
                        'emnist-{}-test-images-idx3-ubyte'.format(split))),
                read_label_file(
                    os.path.join(
                        raw_folder,
                        'emnist-{}-test-labels-idx1-ubyte'.format(split))))
            with open(
                    os.path.join(self.root, self.processed_folder,
                                 self._training_file(split)), 'wb') as f:
                torch.save(training_set, f)
            with open(
                    os.path.join(self.root, self.processed_folder,
                                 self._test_file(split)), 'wb') as f:
                torch.save(test_set, f)

        print('Done!')
# 调用一些和操作系统相关的函数
import os
# 输入输出相关
from skimage import io
# dataset相关
import torchvision.datasets.mnist as mnist

# 路径

# root="/home/s/PycharmProjects/untitled/fashion-mnist/data/fashion"
root = "H:/Paper Code/fashion-mnist"
# 读取二进制文件,这里不知道是不是必须使用mnist读
train_set = (
    mnist.read_image_file(os.path.join(
        root, 'train-images-idx3-ubyte')),  # 路径拼接,split()是分割路径与文件名,和这个正好相反
    mnist.read_label_file(os.path.join(root, 'train-labels-idx1-ubyte')))
test_set = (mnist.read_image_file(os.path.join(root,
                                               't10k-images-idx3-ubyte')),
            mnist.read_label_file(os.path.join(root,
                                               't10k-labels-idx1-ubyte')))

# 打印test_set类型
print(type(test_set))

# 打印test_set中元素个数
print(len(test_set))

# 打印第元素类型,都是tensor
print(type(test_set[0]))
print(type(test_set[1]))