def MNIST(mode="train", download=True): # 由于不同数据集区分训练或测试集的方式不同,因此建议数据集以统一的接口定义 from torchvision.datasets.mnist import MNIST from torchvision import transforms weak = transforms.ToTensor() if mode == "train": return MNIST(root=root, train=True, download=download, transform=weak) else: return MNIST(root=root, train=False, transform=weak, download=download)
def build(self): train_dt = MNIST(self.data_dir, transform=self.train_trans, download=True) test_dt = MNIST(self.data_dir, train=False, transform=self.test_trans, download=True) return train_dt, test_dt
def main(): epoch = 10 save_model_name = "mnist_model.pth" # 保存的模型名 if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') # 1. 数据集 train_mnist = MNIST("./mnist", train=True, transform=trans.ToTensor(), download=True) test_mnist = MNIST("./mnist", train=False, transform=trans.ToTensor(), download=True) train_loader = DataLoader(train_mnist, 32, True, pin_memory=True, num_workers=1) test_loader = DataLoader(test_mnist, 32, True, pin_memory=True, num_workers=1) # 若运行报错使用下面的. # train_loader = DataLoader(train_mnist, 32, True, pin_memory=True) # test_loader = DataLoader(test_mnist, 32, True, pin_memory=True) # 2. 建立网络、损失、优化器 model = SimpleCNN().to(device) if os.path.exists(save_model_name): load_params(model, save_model_name) loss_fn = nn.CrossEntropyLoss() optim = torch.optim.SGD(model.parameters(), 1e-2, momentum=0.9, weight_decay=1e-4) lr_scheduler = LRScheduler(optim, lr_func) writer = SummaryWriter() # optim = torch.optim.Adam(model.parameters(), 5e-4, weight_decay=1e-4) # 3. 训练集训练 print("---------------------- Train") train(model, train_loader, loss_fn, optim, lr_scheduler, writer, epoch, device) # 测试集测试 print("---------------------- Test") test(model, test_loader, device) save_params(model, save_model_name) writer.close()
def __init__(self, train=True, root='../data'): if train: self.dataset = MNIST(root=root, train=True) else: self.dataset = MNIST(root=root, train=False) self.transform = transforms.Compose( [transforms.ToTensor()] ) self.p = Pipeline() self.p.random_distortion(probability=0.5, grid_width=7, grid_height=7, magnitude=1)
class Datasets(NamedTuple): train = MNIST("data/mnist", download=True, train=True, transform=T.Compose([T.RandomRotation(20), T.ToTensor()], )) test = MNIST( "data/mnist", download=True, train=False, transform=T.ToTensor(), )
def cli_main(): import sys sys.argv = [ '', '--resume_from_checkpoint', 'lightning_logs/version_0/checkpoints/epoch=288.ckpt' ] pl.seed_everything(1234) # ------------ # args # ------------ parser = ArgumentParser() parser.add_argument('--batch_size', default=32, type=int) parser.add_argument('--hidden_dim', type=int, default=128) parser = pl.Trainer.add_argparse_args(parser) args = parser.parse_args() # ------------ # data # ------------ dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) train_loader = DataLoader(mnist_train, batch_size=args.batch_size) val_loader = DataLoader(mnist_val, batch_size=args.batch_size) test_loader = DataLoader(mnist_test, batch_size=args.batch_size) # ------------ # model # ------------ model = LitAutoEncoder() model = LitAutoEncoder.load_from_checkpoint( 'lightning_logs/version_0/checkpoints/epoch=290.ckpt') # ------------ # training # ------------ trainer = pl.Trainer.from_argparse_args(args) trainer.fit(model, train_loader, val_loader) # ------------ # testing # ------------ result = trainer.test(test_dataloaders=test_loader) print(result)
def get_data_loaders(train_batch_size, val_batch_size): data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) train_data = MNIST(download=True, root=".", transform=data_transform, train=True) val_data = MNIST(download=False, root=".", transform=data_transform, train=False) train_loader = DataLoader(Subset(train_data, range(N_TRAIN_EXAMPLES)), batch_size=train_batch_size, shuffle=True) val_loader = DataLoader(Subset(val_data, range(N_VALID_EXAMPLES)), batch_size=val_batch_size, shuffle=False) return train_loader, val_loader
def load_mnist(path=os.path.join(BASE_PATH, 'mnist')): """ Retourne l'ensemble d'entraînement du jeu de données MNIST. Le jeu de données est téléchargé s'il n'est pas présent. Args: path (str): Le répertoire où trouver ou télécharger MNIST. Returns: Tuple (jeu de données d'entraînement, jeu de données de test). """ train_dataset = MNIST(path, train=True, download=True) test_dataset = MNIST(path, train=False, download=True) return train_dataset, test_dataset
def get_dataloader(batch_size, root='mnist_data'): root = Path(root).expanduser() if not root.exists(): root.mkdir() root = str(root) transform = transforms.ToTensor() train_data = MNIST(root, train=True, download=True, transform=transform) test_data = MNIST(root, train=False, transform=transform) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True) return train_loader, test_loader
class MNISTDataset(NamedTuple): trainset = MNIST( root="./dataset", train=True, download=True, transform=train_transform, ) validset = MNIST( root="./dataset", train=False, download=True, transform=valid_transform, )
def cli_main(): pl.seed_everything(1234) # ------------ # args # ------------ parser = ArgumentParser() parser.add_argument("--batch_size", default=32, type=int) parser.add_argument("--hidden_dim", type=int, default=128) parser.add_argument("--logdir", type=str, default="./logs") parser = pl.Trainer.add_argparse_args(parser) args = parser.parse_args() # ------------ # data # ------------ dataset = MNIST("", train=True, download=True, transform=transforms.ToTensor()) mnist_test = MNIST("", train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) train_loader = DataLoader(mnist_train, batch_size=args.batch_size) val_loader = DataLoader(mnist_val, batch_size=args.batch_size) test_loader = DataLoader(mnist_test, batch_size=args.batch_size) # ------------ # model # ------------ model = LitAutoEncoder() # ------------ # logging # ------------ tb_logger = TensorBoardLogger(args.logdir) # ------------ # training # ------------ trainer = pl.Trainer.from_argparse_args(args, logger=tb_logger) trainer.fit(model, train_loader, val_loader) # ------------ # testing # ------------ result = trainer.test(test_dataloaders=test_loader) print(result)
class MNISTDataset(NamedTuple): train = MNIST( root="./data", train=True, transform=T.Compose([T.RandomRotation(25), T.ToTensor()]), download=True, ) test = MNIST( root="./data", train=False, transform=T.ToTensor(), download=True, )
class MNISTDataset: train = MNIST( root="./dataset", train=True, transform=train_transform, download=True, ) test = MNIST( root="./dataset", train=False, transform=test_transform, download=True, )
def cli_main(): pl.seed_everything(1234) # ------------ # args # ------------ parser = ArgumentParser() parser.add_argument('--batch_size', default=32, type=int) parser = pl.Trainer.add_argparse_args(parser) parser = LitClassifier.add_model_specific_args(parser) args = parser.parse_args() # ------------ # data # ------------ dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) train_loader = DataLoader(mnist_train, batch_size=args.batch_size) val_loader = DataLoader(mnist_val, batch_size=args.batch_size) test_loader = DataLoader(mnist_test, batch_size=args.batch_size) # ------------ # model # ------------ model = LitClassifier(args.hidden_dim, args.learning_rate) # ------------ # training # ------------ aim_logger = AimLogger( experiment='pt_lightning_exp', train_metric_prefix='train_', test_metric_prefix='test_', val_metric_prefix='val_', ) trainer = pl.Trainer(logger=aim_logger) trainer.fit(model, train_loader, val_loader) # ------------ # testing # ------------ trainer.test(test_dataloaders=test_loader)
def cli_main(): if not DALI_AVAILABLE: return pl.seed_everything(1234) # ------------ # args # ------------ parser = ArgumentParser() parser.add_argument('--batch_size', default=32, type=int) parser = pl.Trainer.add_argparse_args(parser) parser = LitClassifier.add_model_specific_args(parser) args = parser.parse_args() # ------------ # data # ------------ dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) eii_train = ExternalMNISTInputIterator(mnist_train, args.batch_size) eii_val = ExternalMNISTInputIterator(mnist_val, args.batch_size) eii_test = ExternalMNISTInputIterator(mnist_test, args.batch_size) pipe_train = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_train, num_threads=2, device_id=0) train_loader = DALIClassificationLoader(pipe_train, size=len(mnist_train), auto_reset=True, fill_last_batch=True) pipe_val = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_val, num_threads=2, device_id=0) val_loader = DALIClassificationLoader(pipe_val, size=len(mnist_val), auto_reset=True, fill_last_batch=False) pipe_test = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_test, num_threads=2, device_id=0) test_loader = DALIClassificationLoader(pipe_test, size=len(mnist_test), auto_reset=True, fill_last_batch=False) # ------------ # model # ------------ model = LitClassifier(args.hidden_dim, args.learning_rate) # ------------ # training # ------------ trainer = pl.Trainer.from_argparse_args(args) trainer.fit(model, train_loader, val_loader) # ------------ # testing # ------------ trainer.test(test_dataloaders=test_loader)
def cli_main(): pl.seed_everything(1234) # ------------ # args # ------------ parser = ArgumentParser() parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--hidden_dim', type=int, default=128) parser.add_argument('--learning_rate', type=float, default=0.0001) parser = pl.Trainer.add_argparse_args(parser) args = parser.parse_args() # init neptune experiment import os from utils import init_neptune api_key = os.environ['NEPTUNE_API_TOKEN'] project_name = 'zzragida/examples-mnist' experiment_name = 'pytorch-lightning-mnist' experiment_tags = ['lightning', 'mnist'] neptune_logger = init_neptune(args, api_key, project_name, experiment_name, experiment_tags) # ------------ # data # ------------ dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) train_loader = DataLoader(mnist_train, batch_size=args.batch_size) val_loader = DataLoader(mnist_val, batch_size=args.batch_size) test_loader = DataLoader(mnist_test, batch_size=args.batch_size) # ------------ # model # ------------ model = LitClassifier(args.hidden_dim, args.learning_rate) # ------------ # training # ------------ trainer = pl.Trainer.from_argparse_args(args, max_epochs=10, logger=neptune_logger) trainer.fit(model, train_loader, val_loader) # ------------ # testing # ------------ trainer.test(test_dataloaders=test_loader)
def setup(self, stage=None): # Assign train/val datasets for use in dataloaders if stage == 'fit' or stage is None: mnist_full = MNIST(self.data_dir, train=True, transform=self.transform) self.mnist_train, self.mnist_val = random_split( mnist_full, [55000, 5000]) # Assign test dataset for use in dataloader(s) if stage == 'test' or stage is None: self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)
def cli_main(): pl.seed_everything(1234) # ------------ # args # ------------ parser = ArgumentParser() parser.add_argument("--batch_size", default=32, type=int) parser.add_argument("--hidden_dim", type=int, default=128) parser = pl.Trainer.add_argparse_args(parser) args = parser.parse_args() # set azureml env vars for multi-node ddp set_environment_variables() # ------------ # data # ------------ dataset = MNIST("", train=True, download=True, transform=transforms.ToTensor()) mnist_test = MNIST("", train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) train_loader = DataLoader(mnist_train, batch_size=args.batch_size) val_loader = DataLoader(mnist_val, batch_size=args.batch_size) test_loader = DataLoader(mnist_test, batch_size=args.batch_size) # ------------ # model # ------------ model = LitAutoEncoder() # ------------ # training # ------------ trainer = pl.Trainer.from_argparse_args(args) trainer.fit(model, train_loader, val_loader) # ------------ # testing # ------------ result = trainer.test(test_dataloaders=test_loader) print(result)
def download_mnist(split="train"): """ Loads split from the MNIST dataset and returns data. """ train = split == "train" # If need to downkload MNIST dataset and uncompress, # it is necessary to create a separate for each process. mnist_exists = os.path.exists( os.path.join( "/tmp/MNIST/processed", MNIST.training_file if train else MNIST.test_file ) ) if mnist_exists: mnist_root = "/tmp" else: rank = "0" if "RANK" not in os.environ else os.environ["RANK"] mnist_root = os.path.join("tmp", "bandits", rank) os.makedirs(mnist_root, exist_ok=True) # download the MNIST dataset: with NoopContextManager(): mnist = MNIST(mnist_root, download=not mnist_exists, train=train) return mnist
def get_mnist_loader(data_dir='dataset', batch_size=128, num_workers=4): transform = transforms.Compose([ transforms.RandomRotation((-30, 30)), transforms.ToTensor(), ]) return DataLoader(dataset=MNIST(root=data_dir, train=True, transform=transform, download=True), batch_size=batch_size, shuffle=True, num_workers=num_workers)
def get_data_loader(data_path, batch_size): transform = transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) train_set = MNIST(data_path, train=True, download=True, transform=transform) test_set = MNIST(data_path, train=False, download=True, transform=transform) return train_set, test_set
def get_iterator(mode): dataset = MNIST(root='./data', download=True, train=mode) data = getattr(dataset, 'train_data' if mode else 'test_data') labels = getattr(dataset, 'train_labels' if mode else 'test_labels') tensor_dataset = tnt.dataset.TensorDataset([data, labels]) return tensor_dataset.parallel(batch_size=BATCH_SIZE, num_workers=4, shuffle=mode)
def main(): # Put imports here in order to speed up getting help. from torch import Tensor, float32 from torchvision.datasets.mnist import MNIST from .datasets import FreyFace from .estimators import VAE # All bussiness starts here. if args.dataset[0] == 'frey-face': ff = FreyFace(args.datadir[0], download=True) dataset = ff.data.type(float32) / 255 elif args.dataset[0] == 'mnist': mnist = MNIST(args.datadir[0], download=True) dataset = mnist.train_data.reshape(-1, 28 * 28) # type: Tensor dataset = dataset.type(float32) / 255 else: raise ValueError('Unknown dataset identifier was given.') print('Estimator: ', 'vanila vae') print('Decoder: ', args.decoder) print('Dataset: ', args.dataset[0]) print('Num of epochs: ', args.noepochs[0]) print('Batch size: ', args.batch_size[0]) print('Output directory: ', abspath(args.outdir[0])) print() vae = VAE(nohiddens=args.nohiddens[0], nolatents=args.nolatents[0], nosamples=args.nosamples[0], noepochs=args.noepochs[0], batch_size=args.batch_size[0], show_every=args.show_every[0], decoder=args.decoder, outdir=args.outdir[0]) vae.fit(dataset)
def main(): model = Net() model.load_state_dict(torch.load('save/mnist_cnn.pt')) model.cuda() dataset = MNIST('./data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), # load the testing dataset test_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=1, shuffle=True, pin_memory=True) # test_loader = torch.utils.data.DataLoader(dataset, batch_size=1)#retrieve items 1 at a time seed = 0 for data, target in test_loader: # create the domain # print(data) # domain = generate_domain(data[0][0],0.001) # print(domain) print(model(data.cuda())) break
def get_iterator(mode): mnist_dir = get_dataset_dir("mnist", create=True) ds = MNIST(root=mnist_dir, download=True, train=mode) data = getattr(ds, 'train_data' if mode else 'test_data') labels = getattr(ds, 'train_labels' if mode else 'test_labels') tds = tnt.dataset.TensorDataset([data, labels]) return tds.parallel(batch_size=128, num_workers=4, shuffle=mode)
def train_encoder(): data = MNIST("MNIST", train=True, download=True, transform=transforms.ToTensor()) model = Autoencoder(1).to(device) epochs = 5 outputs = train(model, epochs, 16, 1e-3, data) writer = SummaryWriter("runs/autoencodermnist") for k in range(0, epochs): fig = plt.figure(figsize=(9, 2)) images = outputs[k][1].cpu().detach().numpy() recon = outputs[k][2].cpu().detach().numpy() for i, item in enumerate(images): if i >= 9: break plt.subplot(2, 9, i + 1) plt.imshow(item[0]) for i, item in enumerate(recon): if i >= 9: break plt.subplot(2, 9, 9 + i + 1) plt.imshow(item[0]) writer.add_figure("Autoencoder performance", fig, global_step=k) model.to("cpu") torch.save(model.state_dict(), "autoencoderMNIST.pth")
def datasets(self, params: Params): from torch.utils.data.dataloader import DataLoader from thexp.torch.data.collate import AutoCollate # self.device = torch.device(param.device) train_loader = DataLoader(MNIST(mode='train'), **params.dataloader, collate_fn=AutoCollate(self.device)) test_loader = DataLoader(MNIST(mode='test'), **params.dataloader, collate_fn=AutoCollate(self.device)) self.regist_databundler( train=train_loader, test=test_loader, )
def datasets(self): """Configure the get_datasets used for training""" from torch.utils.data import DataLoader from torchvision.datasets.mnist import MNIST import torchvision.transforms as T def to_target(y): Y = torch.zeros([self.cy]) Y[y] = 1. return Y.reshape([self.cy, 1, 1]) transform = T.Compose([ T.Resize(self.hw), T.CenterCrop(self.hw), T.ToTensor(), T.Normalize([0.5], [0.5]) ]) y = torch.stack([to_target(i) for i in range(self.cy)]) y = y.unsqueeze(0).expand([self.ns, self.cy, self.cy, 1, 1]).reshape( # Expand across last dim [-1, self.cy, 1, 1]) # Batch z = Normal(0., 1.).sample([y.shape[0], self.cz, 1, 1]) return { 'train': DataLoader(MNIST(self.data_root, download=True, transform=transform, target_transform=to_target), batch_size=self.b * self.ngpus, shuffle=True, num_workers=self.ncpus), 'inference': list(zip(y.unsqueeze(0), z.unsqueeze(0))) } # Turn into batches
def main(): global model_nm global it global ims_pth model_nm = args.model it = args.it if args.model == 'MNIST': model = LeNet5() prefix = 'MnistNet' print(model) modelTrain = TrainNN(prefix, net=model) modelTrain.load_checkpoint(args.resume) if args.train_data == "True": if model_nm == "MNIST": data_train = MNIST('./data/mnist', download=True, transform=transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor() ])) dl = DataLoader(data_train, batch_size=128, shuffle=False) target_layer = (modelTrain.net._modules['fc'])._modules['f7'] hooker = Hook(target_layer, 1) fn = '{}_train_ims_out'.format(model_nm) extract_feature(dl, modelTrain.net, hooker, fn) ims_pth = './attackIms/{}_it_{}.pth'.format(model_nm, it) print("loading dataset from ims: ", ims_pth) adv_ds = torch.load(ims_pth) if it == 1: clean_ims = adv_ds['im'].cpu().squeeze(axis=1) print('load clean ims of shape:', clean_ims.shape) adv_ims = adv_ds['adv'].cpu().squeeze(axis=1) print('load crafted images of shape:', adv_ims.shape) labels = adv_ds['label'].cpu().squeeze(axis=1) print('load lable of shape:', labels.shape) if it == 1: ds = torch.utils.data.TensorDataset(clean_ims, labels) dl = DataLoader(ds, batch_size=128, shuffle=False) target_layer = (modelTrain.net._modules['fc'])._modules['f7'] hooker = Hook(target_layer, 1) fn = '{}_clean_ims_out'.format(model_nm) extract_feature(dl, modelTrain.net, hooker, fn) ds = torch.utils.data.TensorDataset(adv_ims, labels) dl = DataLoader(ds, batch_size=128, shuffle=False) target_layer = (modelTrain.net._modules['fc'])._modules['f7'] hooker = Hook(target_layer, 1) fn = '{}_adv_ims_it_{}_out'.format(model_nm, it) extract_feature(dl, modelTrain.net, hooker, fn) pass
def __init__(self, data_root: str, transform=None, train=True): super().__init__() self.dataset = MNIST(root=data_root, train=train, transform=transform, download=True) self.size = len(self.dataset)