Ejemplo n.º 1
0
def load_emnist(val_size=10000, seed=None):
    """Return the train (55k), val (5k, randomly drawn from the original test set) and test (10k) dataset for MNIST."""
    image_transform = transforms.Compose([
        # EMNIST images are flipped and rotated by default, fix this here.
        transforms.RandomHorizontalFlip(1),
        transforms.RandomRotation((90, 90)),
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))
    ])
    target_transform = lambda x: x - 1  # make labels start at 0 instead of 1

    raw_train_dataset = datasets.EMNIST('data/emnist',
                                        split='letters',
                                        train=True,
                                        download=True,
                                        transform=image_transform,
                                        target_transform=target_transform)
    test_dataset = datasets.EMNIST('data/emnist',
                                   split='letters',
                                   train=False,
                                   download=True,
                                   transform=image_transform,
                                   target_transform=target_transform)

    # Split 5k samples from the train dataset for validation (similar to Sacramento et al. 2018).
    utils.seed_torch(seed)
    train_dataset, val_dataset = torch.utils.data.dataset.random_split(
        raw_train_dataset, (len(raw_train_dataset) - val_size, val_size))

    return train_dataset, val_dataset, test_dataset
    def _load(self):
        """
        Load dataset
        :rtype: Tuple[,]
        :return: train and test dataset
        """
        transformations = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1722, ), (0.3310, ))
        ])

        train_loader = DataLoader(datasets.EMNIST(self.data_path,
                                                  split="byclass",
                                                  train=True,
                                                  download=True,
                                                  transform=transformations),
                                  batch_size=self.batch_size,
                                  shuffle=True,
                                  pin_memory=True)

        test_loader = DataLoader(datasets.EMNIST(self.data_path,
                                                 split="byclass",
                                                 train=False,
                                                 download=False,
                                                 transform=transformations),
                                 batch_size=self.batch_size,
                                 shuffle=True,
                                 pin_memory=True)

        dataset_test_len = len(test_loader.dataset)
        dataset_train_len = len(train_loader.dataset)
        print("Длина обучающего датасета {}\n Длина трениро"
              "вочного датасета\n".format(dataset_train_len, dataset_test_len))
        return train_loader, test_loader
Ejemplo n.º 3
0
def load_emnist_dataset():
    import torchvision.datasets as datasets
    mnist_train = datasets.EMNIST(root='../data/emnist',
                                  split='balanced',
                                  train=True,
                                  download=True,
                                  transform=None)
    mnist_test = datasets.EMNIST(root='../data/emnist',
                                 split='balanced',
                                 train=False,
                                 download=True,
                                 transform=None)
    test_labels = np.array(
        [mnist_test[i][1].numpy() for i in range(len(mnist_test))],
        dtype=np.int)
    train_labels = np.array(
        [mnist_train[i][1].numpy() for i in range(len(mnist_train))],
        dtype=np.int)
    test = np.array([
        np.asarray(mnist_test[i][0]).reshape(28 * 28)
        for i in range(len(mnist_test))
    ],
                    dtype=np.float)
    train = np.array([
        np.asarray(mnist_train[i][0]).reshape(28 * 28)
        for i in range(len(mnist_train))
    ],
                     dtype=np.float)
    train /= 255.  # normalize data to be in range [0,1]
    test /= 255.
    return train, train_labels, test, test_labels, [28, 28]
Ejemplo n.º 4
0
    def __init__(self):
        super().__init__()
        _path = Config().data.data_path

        train_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomAffine(degrees=10,
                                    translate=(0.2, 0.2),
                                    scale=(0.8, 1.2)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5])
        ])

        test_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5])
        ])

        self.trainset = datasets.EMNIST(root=_path,
                                        split='balanced',
                                        train=True,
                                        download=True,
                                        transform=train_transform)
        self.testset = datasets.EMNIST(root=_path,
                                       split='balanced',
                                       train=False,
                                       download=True,
                                       transform=test_transform)
Ejemplo n.º 5
0
def get_dataset(name, subset=None):
    if name == 'EMNIST':
        dataset = datasets.EMNIST('./data/EMNIST',
                                  train=True,
                                  download=True,
                                  split='byclass',
                                  transform=transforms.ToTensor())
    elif name == 'MNIST':
        dataset = datasets.EMNIST('./data/MNIST',
                                  train=True,
                                  download=True,
                                  transform=transforms.ToTensor())
    elif name == 'CIFAR10':
        dataset = datasets.CIFAR10('./data/CIFAR10',
                                   train=True,
                                   download=True,
                                   transform=transforms.ToTensor())
    elif name == 'SVHN':
        dataset = datasets.SVHN('./data/SVHN',
                                split='train',
                                download=True,
                                transform=transforms.ToTensor())
    else:
        raise Exception

    if (subset is None) or (subset >= len(dataset)):
        return dataset
    else:
        split = (subset, len(dataset) - subset)
        subset, _ = torch.utils.data.random_split(dataset, split)
        return subset
Ejemplo n.º 6
0
def main():
    # curl https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip -o ../data/EMNIST/raw/emnist.zip
    train_loader = torch.utils.data.DataLoader(
        datasets.EMNIST('../data', 'balanced', train=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
#                          transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=300, shuffle=False)

    test_loader = torch.utils.data.DataLoader(
        datasets.EMNIST('../data', 'balanced', train=False,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
#                          transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=500, shuffle=False)


    model = Net()
    optimizer = optim.SGD(model.parameters(), lr=0.1)

    for epoch in range(3):
        train(model, train_loader, optimizer, epoch)
        test(model, test_loader)
        torch.save(model.state_dict(), "emnist.pt")
def load_emnist(batch_size):
    transformations = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1722, ), (0.3310, ))])

    train_loader = torch.utils.data.DataLoader(datasets.EMNIST(
        DATAPATHS['EMNIST'],
        split="byclass",
        train=True,
        download=True,
        transform=transformations),
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True)

    labels_loader = torch.utils.data.DataLoader(datasets.EMNIST(
        DATAPATHS['EMNIST'],
        split="byclass",
        train=False,
        download=False,
        transform=transformations),
                                                batch_size=batch_size,
                                                shuffle=True,
                                                pin_memory=True)

    dataset_test_len = len(labels_loader.dataset)
    dataset_train_len = len(train_loader.dataset)
    print("Длина обучающего датасета {}\n Длина трениро"
          "вочного датасета\n".format(dataset_train_len, dataset_test_len))
    return train_loader, labels_loader
Ejemplo n.º 8
0
    def get_data(
        self, data_filepath, val_set_percentage, random_split_seed, download=False
    ):
        train_set = datasets.EMNIST(
            root=data_filepath,
            split="balanced",
            train=True,
            download=download,
            transform=self.transform_train,
        )
        num_training_items = int(len(train_set) * (1.0 - val_set_percentage))
        num_val_items = len(train_set) - num_training_items

        train_set, val_set = torch.utils.data.random_split(
            train_set,
            [num_training_items, num_val_items],
            generator=torch.Generator().manual_seed(random_split_seed),
        )

        test_set = datasets.EMNIST(
            root=data_filepath,
            split="balanced",
            train=False,
            transform=self.transform_validate,
        )
        num_labels = 47
        return train_set, val_set, test_set, num_labels
Ejemplo n.º 9
0
    def __init__(self, dataset_name: str, root_dir: Path) -> None:

        self.root_dir = root_dir
        self.dataset_name = dataset_name

        if self.dataset_name == "MNIST":
            ## Reference: https://stackoverflow.com/a/66816284
            new_mnist_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist'
            datasets.MNIST.resources = [
                ('/'.join([new_mnist_mirror,
                           url.split('/')[-1]]), md5)
                for url, md5 in datasets.MNIST.resources
            ]

            transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, )),
                transforms.Lambda(
                    lambda x: torch.flatten(x, start_dim=1).squeeze())
            ])

            self.train_data = datasets.MNIST(root_dir / "raw/",
                                             train=True,
                                             download=True,
                                             transform=transform)
            self.test_data = datasets.MNIST(root_dir / "raw/",
                                            train=False,
                                            download=True,
                                            transform=transform)
            self.num_train_data = len(self.train_data)
            self.num_classes = 10
        elif self.dataset_name == "EMNIST":
            self.train_data = datasets.EMNIST(root_dir / "raw/",
                                              split="letters",
                                              train=True,
                                              download=True)
            self.test_data = datasets.EMNIST(root_dir / "raw/",
                                             split="letters",
                                             train=False,
                                             download=True)
        elif self.dataset_name == "CIFAR10":
            transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

            self.train_data = datasets.CIFAR10(root_dir / "raw/",
                                               train=True,
                                               download=True,
                                               transform=transform)
            self.test_data = datasets.CIFAR10(root_dir / "raw/",
                                              train=False,
                                              download=True,
                                              transform=transform)

            self.num_train_data = len(self.train_data)
            self.num_classes = 10
        else:
            raise ValueError("Unknown dataset_name")
Ejemplo n.º 10
0
def download_EMNIST(split='letters'):
    """
    Download EMNIST dataset and save it into data folder.
    :param split: ['balanced', 'byclass', 'bymerge', 'digits', 'letters', 'mnist']
    """
    data_folder = os.path.join(ROOT_DIR, DATA_DIR)

    dsets.EMNIST(root=data_folder, train=True, transform=transforms.ToTensor(), download=True, split=split)
    dsets.EMNIST(root=data_folder, train=False, transform=transforms.ToTensor(), download=True, split=split)
Ejemplo n.º 11
0
def get_data_loader():
    # Get Data
    root = './data'
    transform_labeled = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(size=28,
                              padding=int(28 * 0.125),
                              padding_mode='reflect'),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.1307, ), std=(0.3081, ))
    ])
    transform_val = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    base_dataset = datasets.EMNIST(root,
                                   train=True,
                                   split='balanced',
                                   download=True)
    label_size = int(args.basicLabelRatio * len(base_dataset))
    train_labeled_idxs, train_unlabeled_idxs = x_u_split(base_dataset.targets,
                                                         label_size,
                                                         args.k_img,
                                                         7 * args.k_img,
                                                         num_classes=47)
    labeled_dataset = EMNISTSSL(root,
                                train_labeled_idxs,
                                train=True,
                                transform=transform_labeled)
    unlabeled_dataset = EMNISTSSL(root,
                                  train_unlabeled_idxs,
                                  train=True,
                                  transform=TransformFix(mean=(0.1307, ),
                                                         std=(0.3081, ),
                                                         size=28))
    test_dataset = datasets.EMNIST(root,
                                   train=False,
                                   split='balanced',
                                   transform=transform_val,
                                   download=True)

    labeled_loader = DataLoader(labeled_dataset,
                                args.bs,
                                num_workers=4,
                                pin_memory=True,
                                shuffle=True)
    unlabeled_loader = DataLoader(unlabeled_dataset,
                                  args.bs * 7,
                                  num_workers=4,
                                  pin_memory=True,
                                  shuffle=True)
    test_loader = DataLoader(test_dataset,
                             args.bs,
                             shuffle=True,
                             num_workers=4,
                             pin_memory=True)
    return labeled_loader, unlabeled_loader, test_loader
Ejemplo n.º 12
0
def getdata_emnist(batch_size):
    transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    data_train=datasets.EMNIST("./data/emnist",split='balanced',transform=transform,train=True,download=True)
    data_test=datasets.EMNIST("./data/emnist",split='balanced',transform=transform,train=False,download=True)

    data_loader_train=torch.utils.data.DataLoader(dataset=data_train,batch_size=batch_size,shuffle=True)
    data_loader_test=torch.utils.data.DataLoader(dataset=data_test,batch_size=batch_size,shuffle=True)

    return data_loader_train,data_loader_test
Ejemplo n.º 13
0
 def __init__(self, args):
     kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
     self.train_loader = torch.utils.data.DataLoader(
         datasets.EMNIST('data/emnist', train=True, download=True, split='byclass',
                        transform=transforms.ToTensor()),
         batch_size=args.batch_size, shuffle=True, **kwargs)
     self.test_loader = torch.utils.data.DataLoader(
         datasets.EMNIST('data/emnist', train=False, split='byclass',
         transform=transforms.ToTensor()),
         batch_size=args.batch_size, shuffle=True, **kwargs)
Ejemplo n.º 14
0
def foo(Model, Name):
    print(Name)
    args = Args()

    torch.manual_seed(args.seed)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 16, 'pin_memory': True} if use_cuda else {}

    transformer = transforms.Compose([
        transforms.Lambda(to_tensor),
    ])

    model = Model().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    test_loader = torch.utils.data.DataLoader(
        datasets.EMNIST(args.data_path, split='mnist', train=False, transform=transformer, download = True),
        batch_size=args.test_batch_size, shuffle=True, **kwargs)

    if len(sys.argv) > 1 and sys.argv[1] == 'test':
        model.load_state_dict(torch.load(args.save_path + Name, map_location=lambda storage, loc: storage))
        failed = {}
        test(args, model, device, test_loader, failed)
        for k, v in failed.items():
            im = Image.new(mode='L', size=(len(v) * 28, 28))
            for i, f in enumerate(map(lambda h: hashmap.get(h, None), v)):
                if f != None:
                    im.paste(f, (i * 28, 0))
            im.save('./result/' + k + '.png')
    else:
        train_loader = torch.utils.data.DataLoader(
            datasets.EMNIST(args.data_path, split='mnist', train=True, transform=transformer, download=True),
            batch_size=args.batch_size, shuffle=True, **kwargs)

        accuracies = []
        for epoch in range(args.epochs):
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader, accuracies)

        torch.save(model.state_dict(), args.save_path + Name)
        accuracieses.append((accuracies, Name))
        plt.plot(accuracies)
        plt.xlabel('epoch')
        plt.ylabel('accuracy')
#         plt.show()
        plt.title(Name)
        plt.savefig(Name)
        plt.clf()
def get_emnist_semi(root, num_expand_x, num_expand_u, device_ids, server_idxs):
    root = './data'
    transform_labeled = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(size=28,
                              padding=int(28 * 0.125),
                              padding_mode='reflect'),
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))
    ])
    transform_val = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    base_dataset = datasets.EMNIST(root,
                                   train=True,
                                   split='balanced',
                                   download=True)

    train_labeled_idxs, train_unlabeled_idxs = x_u_split_semi(
        base_dataset.targets, num_expand_x, num_expand_u, device_ids,
        server_idxs)

    train_unlabeled_dataset_list = []
    train_labeled_dataset_list = []
    train_unlabeled_idxs_tmp = copy.deepcopy(train_unlabeled_idxs[0])

    for id in range(len(train_unlabeled_idxs)):
        train_unlabeled_dataset = EMNIST(root,
                                         train_unlabeled_idxs[id],
                                         train=True,
                                         transform=TransformFix(
                                             size=28,
                                             mean=(0.1307, ),
                                             std=(0.3081, )))
        train_unlabeled_dataset_list.append(train_unlabeled_dataset)

        train_labeled_dataset = EMNIST(root,
                                       train_labeled_idxs[id],
                                       train=True,
                                       transform=transform_labeled)
        train_labeled_dataset_list.append(train_labeled_dataset)

    test_dataset = datasets.EMNIST(root,
                                   train=False,
                                   split='balanced',
                                   transform=transform_val,
                                   download=True)

    return train_labeled_dataset_list, train_unlabeled_dataset_list, test_dataset
Ejemplo n.º 16
0
def load_mnist_dataset(dataset, data_dir, training_data_ratio=1):

    # data_dir = '../data/mnist/'
    apply_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    if dataset == "mnist":
        train_dataset = datasets.MNIST(data_dir,
                                       train=True,
                                       download=True,
                                       transform=apply_transform)

        test_dataset = datasets.MNIST(data_dir,
                                      train=False,
                                      download=True,
                                      transform=apply_transform)
    elif dataset == "fmnist":
        train_dataset = datasets.FashionMNIST(data_dir,
                                              train=True,
                                              download=True,
                                              transform=apply_transform)

        test_dataset = datasets.FashionMNIST(data_dir,
                                             train=False,
                                             download=True,
                                             transform=apply_transform)
    elif dataset == "emnist":
        train_dataset = datasets.EMNIST(data_dir,
                                        train=True,
                                        download=True,
                                        transform=apply_transform,
                                        split="balanced")

        test_dataset = datasets.EMNIST(data_dir,
                                       train=False,
                                       download=True,
                                       transform=apply_transform,
                                       split="balanced")
    else:
        raise NotImplementedError

    if training_data_ratio != 1:
        select_len = int(len(train_dataset) * training_data_ratio)
        train_dataset.data = train_dataset.data[:select_len]
        train_dataset.targets = train_dataset.targets[:select_len]

    return train_dataset, test_dataset
Ejemplo n.º 17
0
def EMNIST(train=False, batch_size=None, augm_flag=False, val_size=None):
    if batch_size==None:
        if train:
            batch_size=train_batch_size
        else:
            batch_size=test_batch_size
    transform_base = [transforms.ToTensor(), pre.Transpose()] #EMNIST is rotated 90 degrees from MNIST
    transform_train = transforms.Compose([
        transforms.RandomCrop(28, padding=4),
    ] + transform_base)
    transform_test = transforms.Compose(transform_base)

    transform_train = transforms.RandomChoice([transform_train, transform_test])

    transform = transform_train if (augm_flag and train) else transform_test

    dataset = datasets.EMNIST(path, split='letters',
                              train=train, transform=transform, download=True)

    if train or val_size is None:
        loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                            shuffle=train, num_workers=1)
        return loader
    else:
        # Split into val and test sets
        test_size = len(dataset) - val_size
        dataset_val, dataset_test = data_utils.random_split(dataset, (val_size, test_size))
        val_loader = torch.utils.data.DataLoader(dataset_val, batch_size=batch_size,
                                                shuffle=train, num_workers=1)
        test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size,
                                                shuffle=train, num_workers=1)
        return val_loader, test_loader
Ejemplo n.º 18
0
def EMNIST(train=False, batch_size=None, augm_flag=False):
    if batch_size == None:
        if train:
            batch_size = train_batch_size
        else:
            batch_size = test_batch_size
    transform_base = [transforms.ToTensor(),
                      pre.Transpose()
                      ]  #EMNIST is rotated 90 degrees from MNIST
    transform_train = transforms.Compose([
        transforms.RandomCrop(28, padding=4),
    ] + transform_base)
    transform_test = transforms.Compose(transform_base)

    transform_train = transforms.RandomChoice(
        [transform_train, transform_test])

    transform = transform_train if (augm_flag and train) else transform_test

    dataset = datasets.EMNIST(path,
                              split='letters',
                              train=train,
                              transform=transform,
                              download=True)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=train,
                                         num_workers=1)
    return loader
Ejemplo n.º 19
0
    def __init__(self, batch_size, augm_flag):
        super().__init__(batch_size, augm_flag)
        self.n_train, self.n_test = 60000, 10000
        # TODO: actually, these numbers are smaller than the real ones.
        self.n_classes = 10
        self.height, self.width, self.n_colors = 28, 28, 1
        self.data_dir = self.base_path + 'emnist/'

        transform_base = [transforms.Lambda(lambda x: np.array(x).T / 255.0)]
        transform_train = transforms.Compose([
            transforms.RandomCrop(self.height, padding=4),
        ] + transform_base)
        transform_test = transforms.Compose(transform_base)
        transform_train = transform_train if self.augm_flag else transform_test
        self.train_dataset = datasets.EMNIST(self.data_dir, split='letters', train=True, transform=transform_train, download=True)
        self.test_dataset = datasets.EMNIST(self.data_dir, split='letters', train=False, transform=transform_test, download=True)
Ejemplo n.º 20
0
def get_dataset(data_name, data_root, image_size, train):
    transform = transforms.Compose(
        [transforms.Resize(image_size),
         transforms.ToTensor()])

    if data_name == "mnist":
        dataset = datasets.MNIST(root=data_root,
                                 train=train,
                                 transform=transform,
                                 download=True)

    elif data_name == "fushion-mnist":
        dataset = datasets.FashionMNIST(root=data_root,
                                        train=train,
                                        transform=transform,
                                        download=True)

    elif data_name == "kmnist":
        dataset = datasets.KMNIST(root=data_root,
                                  train=train,
                                  transform=transform,
                                  download=True)

    elif data_name == "emnist":
        dataset = datasets.EMNIST(root=data_root,
                                  split="byclass",
                                  train=train,
                                  transform=transform,
                                  download=True)

    else:
        dataset = None

    return dataset
Ejemplo n.º 21
0
def get_loader(_dir,
               _batch_size=1,
               _train=True,
               _portion=None,
               _download=False,
               **_args):

    dataset = datasets.EMNIST(
        _dir,
        split='letters',
        train=_train,
        download=_download,
        transform=transforms.ToTensor(),
        target_transform=transforms.Lambda(
            lambda x: x - 1
        )  # Necessary because labels are mapped 1 to 26 instead of 0 to 25
    )

    indices = list(torch.randperm(len(dataset)))

    if _portion is not None:
        indices = indices[0:math.floor(_portion * len(dataset))]

    sampler = torch.utils.data.SubsetRandomSampler(indices)

    batch_sampler = torch.utils.data.BatchSampler(sampler,
                                                  batch_size=_batch_size,
                                                  drop_last=False)

    loader = torch.utils.data.DataLoader(dataset,
                                         batch_sampler=batch_sampler,
                                         **_args)

    return loader
Ejemplo n.º 22
0
def rnn_train():

    train_data = dsets.EMNIST(
        root='./mnist',
        split='mnist',
        train=True,
        transform=transforms.ToTensor(),
        download=DOWNLOADS
    )
    train_load = Data.DataLoader(dataset=train_data,batch_size=BATCH_SIZE,shuffle=True,num_workers=2)




    rnn = RNN()
    optimizer = op.Adam(params=rnn.parameters(),lr=LR)
    loss_fun = nn.CrossEntropyLoss()

    for epoch in range(EPOCH):
        for step ,(t_x ,t_y) in enumerate(train_load):
            y = rnn(t_x)
            loss = loss_fun(y,t_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print("step",step,"| loss=",loss)

    torch.save(rnn,"./rnn.pkl")
Ejemplo n.º 23
0
 def raw_dataset(self, data_dir: str, download: bool, train: bool, transform):
     if self.split == 'letters':
         target_transform = (lambda x: x - 1)
     else:
         target_transform = None
     return datasets.EMNIST(data_dir, split=self.split, download=download, train=train, transform=transform,
                            target_transform=target_transform)
Ejemplo n.º 24
0
    def __init__(self, options):
        transform_list = []
        if options.image_size is not None:
            transform_list.append(
                transforms.Resize((options.image_size, options.image_size)))
            # transform_list.append(transforms.CenterCrop(options.image_size))
        transform_list.append(transforms.ToTensor())
        if options.image_colors == 1:
            transform_list.append(transforms.Normalize(mean=[0.5], std=[0.5]))
        elif options.image_colors == 3:
            transform_list.append(
                transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5,
                                                                0.5]))
        transform = transforms.Compose(transform_list)

        if options.dataset == 'mnist':
            dataset = datasets.MNIST(options.data_dir,
                                     train=True,
                                     download=True,
                                     transform=transform)
        elif options.dataset == 'emnist':
            # Updated URL from https://www.westernsydney.edu.au/bens/home/reproducible_research/emnist
            datasets.EMNIST.url = 'https://cloudstor.aarnet.edu.au/plus/s/ZNmuFiuQTqZlu9W/download'
            dataset = datasets.EMNIST(options.data_dir,
                                      split=options.image_class,
                                      train=True,
                                      download=True,
                                      transform=transform)
        elif options.dataset == 'fashion-mnist':
            dataset = datasets.FashionMNIST(options.data_dir,
                                            train=True,
                                            download=True,
                                            transform=transform)
        elif options.dataset == 'lsun':
            training_class = options.image_class + '_train'
            dataset = datasets.LSUN(options.data_dir,
                                    classes=[training_class],
                                    transform=transform)
        elif options.dataset == 'cifar10':
            dataset = datasets.CIFAR10(options.data_dir,
                                       train=True,
                                       download=True,
                                       transform=transform)
        elif options.dataset == 'cifar100':
            dataset = datasets.CIFAR100(options.data_dir,
                                        train=True,
                                        download=True,
                                        transform=transform)
        else:
            dataset = datasets.ImageFolder(root=options.data_dir,
                                           transform=transform)

        self.dataloader = DataLoader(dataset,
                                     batch_size=options.batch_size,
                                     num_workers=options.loader_workers,
                                     shuffle=True,
                                     drop_last=True,
                                     pin_memory=options.pin_memory)
        self.iterator = iter(self.dataloader)
Ejemplo n.º 25
0
def main():
        train_batch_size = 100

        dataset = datasets.EMNIST(
                        '../dataEMNIST',
                        split = 'balanced',
                        train = True,
                        download = True,
                        transform=transforms.ToTensor()
                )

        shuffle_dataset =  True
        val_split = 0.002
        dataset_size = len(dataset)
        indices = list(range(dataset_size))
        split = int(np.floor(val_split * dataset_size))

        if shuffle_dataset :
            np.random.seed(2)
            np.random.shuffle(indices)


        train_indices, val_indices = indices[split:], indices[:split]

        train_sampler = SubsetRandomSampler(train_indices)
        val_sampler = SubsetRandomSampler(val_indices)

        train_loader = torch.utils.data.DataLoader(
                                        dataset,
                                        sampler = train_sampler,
                                        batch_size = 100
                                    )

        val_loader = torch.utils.data.DataLoader(
                                        dataset,
                                        sampler = val_sampler,
                                        batch_size = 100
                                    )

        model = model_root().to(torch.device("cuda"))
        model.load_state_dict(torch.load('root_emnist.pth'))
        model_one = model_1().to(torch.device("cuda"))
        model_one.load_state_dict(torch.load('emnist_1.pth'))
        model_one_zero = model_1_0().to(torch.device("cuda"))
        model_one_zero.load_state_dict(torch.load('emnist_1_0.pth'))

        learning_rate = 0.0001
        optimizer = torch.optim.Adam(model_one_zero.parameters(), lr=learning_rate)
        loss_fn = nn.CrossEntropyLoss()
        max = train(model, model_one, model_one_zero, optimizer, loss_fn, train_loader, val_loader, torch.device("cuda"), 0)
        for i in range(2):
            model_one_zero.load_state_dict(torch.load('emnist_1_0.pth'))
            learning_rate /= 10
            optimizer = torch.optim.Adam(model_one_zero.parameters(), lr=learning_rate, weight_decay = 5e-4)
            loss_fn = nn.CrossEntropyLoss()
            max = train(model, model_one, model_one_zero, optimizer, loss_fn, train_loader, val_loader, torch.device("cuda"), max)
	def __init__(self):
		super(CustomDataset, self).__init__()

		self.trans = transforms.Compose([
			transforms.Resize(64),
			transforms.ToTensor(),
			transforms.Normalize(mean = [0.5], std = [0.5])
			]
		)
		self.letter_images = datasets.EMNIST('data_letters', 'letters', train=True, download = False, transform = self.trans)
Ejemplo n.º 27
0
def get_train_test_queues(args, train_transform, valid_transform):
  print("Getting",args.dataset,"data")
  if args.dataset == 'cifar10':
    print("Using CIFAR10")
    train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
  elif args.dataset == 'mnist':
    print("Using MNIST")
    train_data = dset.MNIST(root=args.data, train=True, download=True, transform=train_transform)
    valid_data = dset.MNIST(root=args.data, train=False, download=True, transform=valid_transform)
  elif args.dataset == 'emnist':
    print("Using EMNIST")
    train_data = dset.EMNIST(root=args.data, split='balanced', train=True, download=True, transform=train_transform)
    valid_data = dset.EMNIST(root=args.data, split='balanced', train=False, download=True, transform=valid_transform)
  elif args.dataset == 'fashion':
    print("Using Fashion")
    train_data = dset.FashionMNIST(root=args.data, train=True, download=True, transform=train_transform)
    valid_data = dset.FashionMNIST(root=args.data, train=False, download=True, transform=valid_transform)
  elif args.dataset == 'svhn':
    print("Using SVHN")
    train_data = dset.SVHN(root=args.data, split='train', download=True, transform=train_transform)
    valid_data = dset.SVHN(root=args.data, split='test', download=True, transform=valid_transform)
  elif args.dataset == 'stl10':
    print("Using STL10")
    train_data = dset.STL10(root=args.data, split='train', download=True, transform=train_transform)
    valid_data = dset.STL10(root=args.data, split='test', download=True, transform=valid_transform)
  elif args.dataset == 'devanagari':
    print("Using DEVANAGARI")
    # Ensure dataset is present in the directory args.data. Does not support auto download
    print(args.data)
    train_data = dset.ImageFolder(root=os.path.join(args.data,"Train"), transform=train_transform, loader = grey_pil_loader)
    valid_data = dset.ImageFolder(root=os.path.join(args.data, "Test"), transform=valid_transform, loader = grey_pil_loader)
  else:
    assert False, "Cannot get training queue for dataset"

  train_queue = torch.utils.data.DataLoader(
      train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=1)

  valid_queue = torch.utils.data.DataLoader(
      valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=1)

  return train_queue, valid_queue
Ejemplo n.º 28
0
def get_test_loader(data_dir, batch_size, num_workers=4, pin_memory=False):
    """
    Utility function for loading and returning a multi-process
    test iterator over the MNIST dataset.

    If using CUDA, num_workers should be set to 1 and pin_memory to True.

    Args
    ----
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.

    Returns
    -------
    - data_loader: test set iterator.
    """
    # define transforms
    normalize = transforms.Normalize((0.1307, ), (0.3081, ))
    trans = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])

    # load dataset
    #    dataset = datasets.MNIST(
    #        data_dir, train=False, download=False, transform=trans
    #    )
    #gb changes*********************************************************************************************

    emnist_dir = "data"
    # load dataset
    dataset = datasets.EMNIST(
        emnist_dir,
        download=True,
        split='letters',
        train=True,
        transform=transforms.Compose([
            lambda img: transforms.functional.rotate(img, -90),
            lambda img: transforms.functional.hflip(img),
            transforms.ToTensor()
        ]))

    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=pin_memory,
    )

    return data_loader
Ejemplo n.º 29
0
 def __init__(self, train: bool = True, 
              max_seq_len: int = 1, 
              blank_label = 11,
              pad_label = 10,
              img_size: [int, int] = (28, 28)):
     self.dataset = datasets.EMNIST(root='./data', 
                                    split='mnist',
                                    train=train,
                                    download=True)
     self.max_seq_len = max_seq_len
     self.img_size = img_size
Ejemplo n.º 30
0
def make_dataset(dataset, dataroot, imageSize):
    """
    :param dataset: must be in 'cifar10 | lsun | imagenet | folder | lfw | fake'
    :return: pytorch dataset for DataLoader to utilize
    """
    if dataset in ['imagenet', 'folder', 'lfw']:
        # folder dataset
        dataset = dset.ImageFolder(root=dataroot,
                                   transform=transforms.Compose([
                                       transforms.Resize(imageSize),
                                       transforms.CenterCrop(imageSize),
                                       transforms.ToTensor(),
                                       transforms.Normalize(
                                           (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                   ]))
    elif dataset == 'lsun':
        dataset = dset.LSUN(db_path=dataroot, classes=['bedroom_train'],
                            transform=transforms.Compose([
                                transforms.Resize(imageSize),
                                transforms.CenterCrop(imageSize),
                                transforms.ToTensor(),
                                transforms.Normalize(
                                    (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                            ]))
    elif dataset == 'cifar10':
        dataset = dset.CIFAR10(root=dataroot, download=True,
                               transform=transforms.Compose([
                                   transforms.Resize(imageSize),
                                   transforms.CenterCrop(imageSize),
                                   transforms.ToTensor(),
                                   transforms.Normalize(
                                       (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                               ]))
    elif dataset == 'emnist':
        dataset = dset.EMNIST(root=dataroot,download=True,split='letters',
                                   transform=transforms.Compose([
                                       transforms.Resize(imageSize),
                                       transforms.ToTensor(),
                                       transforms.Normalize(
                                           (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                   ]))
    elif dataset == 'celeba':
        dataset = dset.ImageFolder(root=dataroot,
                                   transform=transforms.Compose([
                                       transforms.CenterCrop(138),
                                       transforms.Resize(imageSize),
                                       transforms.ToTensor(),
                                       transforms.Normalize(
                                           (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                   ]))
    else:
        raise Exception('--dataset must be in cifar10 | lsun | imagenet | folder | lfw | fake')
    assert dataset
    return dataset