def datasets(self, params: Params): from torch.utils.data.dataloader import DataLoader from thexp.torch.data.collate import AutoCollate from datasets import MNIST, FMNIST if params.dataset == 'mnist': train_loader = DataLoader(MNIST(mode='train'), **params.dataloader, collate_fn=AutoCollate(self.device)) test_loader = DataLoader(MNIST(mode='test'), **params.dataloader, collate_fn=AutoCollate(self.device)) elif params.dataset == 'fmnist': train_loader = DataLoader(FMNIST(mode='train'), **params.dataloader, collate_fn=AutoCollate(self.device)) test_loader = DataLoader(FMNIST(mode='test'), **params.dataloader, collate_fn=AutoCollate(self.device)) else: assert False self.regist_databundler( train=train_loader, test=test_loader, )
def main_margin_with_vector_loss(used_labels=None, feature_size=2, s=8.0, m1=2.00, m2=0.5, m3=0.35, m4=0.5, lda=0.2, subdir=None): trainset = MNIST('train', used_labels) validset = MNIST('valid', used_labels) net = NetworkMargin(num_classes=trainset.n_classes, feature_size=feature_size) base_params = list( filter(lambda x: id(x) != id(net.cosine_layer.weights), net.parameters())) params = [ { 'params': base_params, 'weight_decay': 4e-5 }, { 'params': net.cosine_layer.weights, 'weight_decay': 4e-4 }, ] criterion = MarginLoss(s, m1, m2, m3, m4) optimizer = optim.Adam lr_scheduler = MultiStepLR trainer = MarginTrainerWithVectorLoss(configer, net, params, trainset, validset, criterion, lda, optimizer, lr_scheduler, num_to_keep=5, resume=False, valid_freq=1, show_embedding=True, subdir=subdir) trainer.train() del trainer
def main(): args = parse_args() args.log_dir = os.path.join(args.log_dir, args.name) if os.path.exists(args.log_dir): if query_yes_no('You already have a run called {}, override?'.format( args.name)): shutil.rmtree(args.log_dir) else: exit(0) del args.__dict__['name'] log_dir = args.log_dir epochs = args.epochs content_dim = args.content_dim class_dim = args.class_dim batch_size = args.batch_size lr = args.lr stddev = args.stddev num_samples = args.num_samples mnist_class = MNIST(num_samples=num_samples) mnist_loader = DataLoader(mnist_class, batch_size=batch_size, shuffle=True) generator = GeneratorForMnistGLO(content_dim + class_dim) writer = SummaryWriter(log_dir=log_dir) train(data_loader=mnist_loader, glo_generator=generator, writer=writer, lr=lr, content_dim=content_dim, class_dim=class_dim, epochs=epochs, stddev=stddev, num_samples=num_samples)
def create_numpy_dataset(opt): ''' @ Original 28x28 is rescaled to 32x32 to meet 2^P size @ batch_size and workders can be increased for faster loading ''' print(torch.__version__) train_batch_size = opt.train_batch_size test_batch_size = opt.test_batch_size kwargs = {} train_loader = data_utils.DataLoader(MNIST(root='./data', train=True, process=False, transform=transforms.Compose([ transforms.Scale(32), transforms.ToTensor(), ])), batch_size=train_batch_size, shuffle=True, **kwargs) test_loader = data_utils.DataLoader(MNIST(root='./data', train=False, process=False, transform=transforms.Compose([ transforms.Scale(32), transforms.ToTensor(), ])), batch_size=test_batch_size, shuffle=True, **kwargs) # create numpy dataset datasets = [] labels = [] for data, label in train_loader: data_numpy = data.numpy() label_numpy = label.numpy() datasets.append(data_numpy) labels.append(label_numpy) datasets = np.concatenate(datasets, axis=0) labels = np.concatenate(labels, axis=0) print('Create numpy dataset done, size: {}'.format(datasets.shape)) return datasets[:opt.loadSize], labels[:opt.loadSize]
def main_adaptivemargin(used_labels=None, feature_size=2, s=8.0, subdir=None): trainset = MNIST('train', used_labels) validset = MNIST('valid', used_labels) net = NetworkMargin(num_classes=trainset.n_classes, feature_size=feature_size) criterion = MarginLossWithParameter(trainset.n_classes, s) base_params = list( filter(lambda x: id(x) != id(net.cosine_layer.weights), net.parameters())) params = [ { 'params': base_params, 'weight_decay': 4e-5 }, { 'params': net.cosine_layer.weights, 'weight_decay': 4e-4 }, { 'params': criterion.parameters(), 'weight_decay': 4e-4 }, ] optimizer = optim.Adam lr_scheduler = MultiStepLR trainer = MarginTrainerWithParameter(configer, net, params, trainset, validset, criterion, optimizer, lr_scheduler, num_to_keep=5, resume=False, valid_freq=1, show_embedding=True, subdir=subdir) trainer.train() del trainer
def _init_dataset(self): if self.args.dataset == 'MNIST': self.data = MNIST(self.args) elif self.args.dataset == 'EMNIST': self.data = EMNIST(self.args) elif self.args.dataset == 'FashionMNIST': self.data = FashionMNIST(self.args) else: print("Dataset not supported") sys.exit()
def get_dataset(dataset_name, num_epochs, batch_size): if dataset_name == 'mnist': dataset = MNIST(num_epochs, batch_size) elif dataset_name == 'cifar10': # dataset = CIFAR10(num_epochs, batch_size, validation_size=5000) dataset = CIFAR10_GCN_WHITENED(num_epochs, batch_size) else: raise ValueError('Dataset option not valid.') return dataset
def main_pca(used_labels=None, subdir='PCA'): from sklearn.decomposition import PCA validset = MNIST('valid', used_labels) pca = PCA(n_components=3) mat = pca.fit_transform( validset.images.reshape(validset.images.shape[0], -1)) logdir = os.path.join(configer.logdir, subdir) if subdir is not None else configer.logdir with SummaryWriter(logdir) as w: w.add_embedding(mat, validset.labels)
def main_unsupervised_sigma_i(feature_size, n_clusters=50, batchnorm=False, lamb=1.0, entropy_type='shannon', lr_m=1.0, used_labels=None, show_embedding=True, subdir=None): trainset = MNIST('train', used_labels) validset = MNIST('valid', used_labels) net = NetworkUnsupervised(feature_size, batchnorm=batchnorm) criterion = LossUnsupervisedSigmaI(n_clusters, feature_size, lamb, entropy_type) params = [{ 'params': net.parameters(), }, { 'params': criterion.parameters(), 'lr': lr_m * configer.lrbase }] optimizer = optim.SGD lr_scheduler = MultiStepLR trainer = UnsupervisedTrainer(configer, net, params, trainset, validset, criterion, optimizer, lr_scheduler, num_to_keep=5, resume=False, valid_freq=1, show_embedding=True, subdir=subdir) trainer.train() del trainer
def main_crossent(feature_size, used_labels=None): trainset = MNIST('train', used_labels) validset = MNIST('valid', used_labels) net = Network(trainset.n_classes, feature_size=feature_size) params = net.parameters() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD lr_scheduler = MultiStepLR trainer = SupervisedTrainer(configer, net, params, trainset, validset, criterion, optimizer, lr_scheduler, num_to_keep=5, resume=False, valid_freq=1, show_embedding=True) trainer.train() del trainer
def test_mnist(): # type: () -> None """ Performs One-class classification tests on MNIST """ # Build dataset and model dataset = MNIST(path='data/MNIST') model = LSAMNIST(input_shape=dataset.shape, code_length=64, cpd_channels=100).cuda().eval() # Set up result helper and perform test helper = OneClassResultHelper(dataset, model, checkpoints_dir='checkpoints/mnist/', output_file='mnist.txt') helper.test_one_class_classification()
def test(models, device): test_dataset = MNIST(config.dataset_dir, split='test') test_loader = DataLoader(test_dataset, batch_size=config.batch_size, num_workers=config.num_workers) correct = 0 total = 0 with torch.no_grad(): for data, labels in test_loader: data = data.to(device) labels = labels.to(device) logits = models(data) preds = logits.argmax(dim=1) total += labels.size(0) correct += torch.sum(preds == labels).item() print("Accuracy: {}%".format(correct / total * 100))
idx = np.random.choice(num_train, batch_size) X_batch = X[idx] y_batch = y[idx] loss, grad = self.loss(X_batch, y_batch, reg) loss_history.append(loss) self.W = self.W - learning_rate * grad if verbose and i % 100 == 0: print('iteration %d / %d: loss %f' % (i, num_iters, loss)) return loss_history def predict(self, X): y_pred = np.zeros(X.shape[0]) scores = X.dot(self.W) y_pred = np.argmax(scores, axis=1) # 算出得分(N, C) 然后每行中最大的那个就是所预测的分类 return y_pred if __name__ == '__main__': dirname = os.path.dirname(__file__) mnist = MNIST(os.path.join(dirname, 'mnist')) # mnist.show() svm = LinearSVM() svm.train(mnist.train_data, mnist.train_labels) y_pred = svm.predict(mnist.test_data) correct_num = np.sum(y_pred == mnist.test_labels) accuracy = 100 * correct_num/mnist.test_data.shape[0] print(f"分类准确率 {accuracy=:.2f}%")
def main(**kwargs): kwargs.setdefault('data_size', 500) kwargs.setdefault('epochs', 600) kwargs.setdefault('learning_rate', 0.001) kwargs.setdefault('patience', None) kwargs.setdefault('ewc', 0) kwargs.setdefault('batch_size', 128) kwargs.setdefault('cuda', None) kwargs.setdefault('dry_run', False) kwargs.setdefault('name', None) kwargs.setdefault('seed', 1337) kwargs.setdefault('verbose', 'WARN') kwargs.setdefault('task', ['+mnist', '-mnist']) args = SimpleNamespace(**kwargs) logging.basicConfig( level=args.verbose, style='{', format='[{levelname:.4}][{asctime}][{name}:{lineno}] {msg}', ) logger.debug('parameters of this experiment') for key, val in args.__dict__.items(): logger.debug(f' {key:.15}: {val}') seed(args.seed) datasets = { 'mnist': MNIST(), 'fashion': FashionMNIST(), } if args.name is None: now = np.datetime64('now') args.name = f'exp-{now}' logger.info(f'experiment name not given, defaulting to {args.name}') # In some cases, we must move the network to it's cuda device before # constructing the optimizer. This is annoying, and this logic is # duplicated in the estimator class. Ideally, I'd like the estimator to # handle cuda allocation _after_ the optimizer has been constructed... net = AlexNet(10, shape=(1, 27, 27)) if args.cuda is None: args.cuda = 0 if torch.cuda.is_available() else False if args.cuda is not False: net = net.cuda(args.cuda) opt = O.Adagrad(net.parameters(), lr=args.learning_rate, weight_decay=0.004) loss = N.CrossEntropyLoss() model = EwcClassifier(net, opt, loss, name=args.name, cuda=args.cuda, dry_run=args.dry_run) for task in args.tasks: data = datasets[task[1:]] train, test = data.load() if task[0] == '+': print(f'-------- Fitting {task[1:]} --------') model.fit(train, epochs=args.epochs, patience=args.patience, batch_size=args.batch_size) model.consolidate(train, alpha=args.ewc, batch_size=args.batch_size) print() if task[0] == '-': print(f'-------- Scoring {task[1:]} --------') scores = { 'accuracy': Accuracy(), 'true positives': TruePositives(), 'false positives': FalsePositives(), 'true negatives': TrueNegatives(), 'false negatives': FalseNegatives(), 'precision': Precision(), 'recall': Recall(), 'f-score': FScore(), } for metric, criteria in scores.items(): score = model.test(test, criteria, batch_size=args.batch_size) print(f'{metric:15}: {score}') print()
blue = np.eye(size) img = np.array([red, green, blue]) #img = np.ravel(img, order='C') img = np.reshape(img, (-1, size, size)) img = np.reshape(img, (size, size, -1)) if show: plt.imshow(img) plt.show() return img def connectivity_experiment(): image = create_probe_image(32) image = torch.from_numpy(image).contiguous().float() #sys.exit(1) path = 'saved-models/groupnetrgb.torch' model = GroupNetRGB(1, (3, 32, 32), 3) model.test_connectivity(Variable(image.view(1, 3, 32, 32))) if __name__ == '__main__': connectivity_experiment() sys.exit(1) torch.manual_seed(2) model = OtherNet(8, (1, 28, 28), kernel_size=3, maxpool=1) dataset = MNIST(8) img = dataset.get_random_examples(1) #model_path = 'models/' + model.__class__.__name__ + '.torch' model_path = 'models/OtherNet_kernel3_nopool_epoch_100.torch' model.load_state_dict(torch.load(model_path, map_location='cpu')) get_activations(model, img)
import torch import setka import torchvision.datasets import setka.base import setka.pipes import os import numpy import sys sys.path.append(os.getcwd()) from datasets import MNIST from models import ThreeLayersFullyConnected, LeNetFullyConvolutional ds = MNIST() ## Trying fully connected net = ThreeLayersFullyConnected(n=[100, 100, 10]) def loss(predictions, targets): return torch.nn.functional.cross_entropy(predictions[0], targets[0]) def acc(predictions, targets): return (predictions[0].argmax( dim=-1) == targets[0]).sum(), targets[0].numel() trainer = setka.base.Trainer(
def main(): device = 'cuda' if torch.cuda.is_available() else 'cpu' print(device) best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch # checkpoint args.checkpoint = './checkpoints/mnist/' + args.arch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # folder to save figures args.plotfolder = './checkpoints/mnist/' + args.arch + '/plotter' if not os.path.isdir(args.plotfolder): mkdir_p(args.plotfolder) # Data print('==> Preparing data..') transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) trainset = MNIST(root='../../data', train=True, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testset = MNIST(root='../../data', train=False, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) # data loader trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=4) # Model net = Network(backbone=args.arch, num_classes=args.train_class_num, embed_dim=args.embed_dim) fea_dim = net.classifier.in_features net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True if args.resume: # Load checkpoint. if os.path.isfile(args.resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint['net']) # best_acc = checkpoint['acc'] # print("BEST_ACCURACY: "+str(best_acc)) start_epoch = checkpoint['epoch'] logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt')) logger.set_names([ 'Epoch', 'Learning Rate', 'Train Loss', 'Train Acc.', 'Test Loss', 'Test Acc.' ]) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # test(0, net, trainloader, testloader, criterion, device) epoch = 0 if not args.evaluate: for epoch in range(start_epoch, args.es): print('\nEpoch: %d Learning rate: %f' % (epoch + 1, optimizer.param_groups[0]['lr'])) adjust_learning_rate(optimizer, epoch, args.lr, step=20) train_loss, train_acc = train(net, trainloader, optimizer, criterion, device) save_model(net, None, epoch, os.path.join(args.checkpoint, 'last_model.pth')) test_loss, test_acc = 0, 0 # logger.append([ epoch + 1, optimizer.param_groups[0]['lr'], train_loss, train_acc, test_loss, test_acc ]) plot_feature(net, trainloader, device, args.plotfolder, epoch=epoch, plot_class_num=args.train_class_num, maximum=args.plot_max, plot_quality=args.plot_quality) test(epoch, net, trainloader, testloader, criterion, device) test(99999, net, trainloader, testloader, criterion, device) plot_feature(net, testloader, device, args.plotfolder, epoch="test", plot_class_num=args.train_class_num + 1, maximum=args.plot_max, plot_quality=args.plot_quality) logger.close()
__author__ = 'marechaux' from subnet.network import * from datasets.encoder.onehot import * from datasets.MNIST import * from fill_subnet import * from execute import * learn_db = MNIST("training") test_db = MNIST("testing") input_datasink = learn_db.input_datasink output_datasink = learn_db.output_datasink test_subnet = Subnet() FillSubnet.MLP2(test_subnet, input_datasink, output_datasink) n = Network(test_subnet) encoder = Onehot(output_datasink) execute = Execute(learn_db, test_db, encoder) execute.mean(n, 1)
def main(): # N.B.: parameters defined in cv_cfg.ini override args! parser = argparse.ArgumentParser(description='Cross-validation over source domains for the digits datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', default='MODAFM', type=str, metavar='', help='model type (\'MDAN\' / \'MODA\' / \'MODAFM\'') parser.add_argument('-d', '--data_path', default='/ctm-hdd-pool01/DB/', type=str, metavar='', help='data directory path') parser.add_argument('-t', '--target', default='MNIST', type=str, metavar='', help='target domain (\'MNIST\' / \'MNIST_M\' / \'SVHN\' / \'SynthDigits\')') parser.add_argument('-o', '--output', default='msda_hyperparams.ini', type=str, metavar='', help='model file (output of train)') parser.add_argument('-n', '--n_iter', default=20, type=int, metavar='', help='number of CV iterations') parser.add_argument('--n_images', default=20000, type=int, metavar='', help='number of images from each domain') parser.add_argument('--mu', type=float, default=1e-2, help="hyperparameter of the coefficient for the domain adversarial loss") parser.add_argument('--beta', type=float, default=0.2, help="hyperparameter of the non-sparsity regularization") parser.add_argument('--lambda', type=float, default=1e-1, help="hyperparameter of the FixMatch loss") parser.add_argument('--n_rand_aug', type=int, default=2, help="N parameter of RandAugment") parser.add_argument('--m_min_rand_aug', type=int, default=3, help="minimum M parameter of RandAugment") parser.add_argument('--m_max_rand_aug', type=int, default=10, help="maximum M parameter of RandAugment") parser.add_argument('--weight_decay', default=0., type=float, metavar='', help='hyperparameter of weight decay regularization') parser.add_argument('--lr', default=1e-1, type=float, metavar='', help='learning rate') parser.add_argument('--epochs', default=30, type=int, metavar='', help='number of training epochs') parser.add_argument('--batch_size', default=8, type=int, metavar='', help='batch size (per domain)') parser.add_argument('--checkpoint', default=0, type=int, metavar='', help='number of epochs between saving checkpoints (0 disables checkpoints)') parser.add_argument('--use_cuda', default=True, type=int, metavar='', help='use CUDA capable GPU') parser.add_argument('--use_visdom', default=False, type=int, metavar='', help='use Visdom to visualize plots') parser.add_argument('--visdom_env', default='digits_train', type=str, metavar='', help='Visdom environment name') parser.add_argument('--visdom_port', default=8888, type=int, metavar='', help='Visdom port') parser.add_argument('--verbosity', default=2, type=int, metavar='', help='log verbosity level (0, 1, 2)') parser.add_argument('--seed', default=42, type=int, metavar='', help='random seed') args = vars(parser.parse_args()) # override args with cv_cfg.ini cfg = args.copy() cv_parser = ConfigParser() cv_parser.read('cv_cfg.ini') cv_param_names = [] for key, val in cv_parser.items('main'): cfg[key] = ast.literal_eval(val) cv_param_names.append(key) # use a fixed random seed for reproducibility purposes if cfg['seed'] > 0: random.seed(cfg['seed']) np.random.seed(seed=cfg['seed']) torch.manual_seed(cfg['seed']) torch.cuda.manual_seed(cfg['seed']) device = 'cuda' if (cfg['use_cuda'] and torch.cuda.is_available()) else 'cpu' log = Logger(cfg['verbosity']) log.print('device:', device, level=0) if 'FM' in cfg['model']: # weak data augmentation (small rotation + small translation) data_aug = T.Compose([ T.RandomAffine(5, translate=(0.125, 0.125)), T.ToTensor(), ]) else: data_aug = T.ToTensor() cfg['test_transform'] = T.ToTensor() # define all datasets datasets = {} datasets['MNIST'] = MNIST(train=True, path=os.path.join(cfg['data_path'], 'MNIST'), transform=data_aug) datasets['MNIST_M'] = MNIST_M(train=True, path=os.path.join(cfg['data_path'], 'MNIST_M'), transform=data_aug) datasets['SVHN'] = SVHN(train=True, path=os.path.join(cfg['data_path'], 'SVHN'), transform=data_aug) datasets['SynthDigits'] = SynthDigits(train=True, path=os.path.join(cfg['data_path'], 'SynthDigits'), transform=data_aug) del datasets[cfg['target']] # get a subset of cfg['n_images'] from each dataset for ds_name in datasets: if ds_name == cfg['target']: continue indices = random.sample(range(len(datasets[ds_name])), cfg['n_images']) datasets[ds_name] = Subset(datasets[ds_name], indices[0:cfg['n_images']]) if cfg['model'] == 'MDAN': cfg['model'] = MDANet(len(datasets)-1).to(device) cfg['train_routine'] = lambda model, optimizer, train_loader, cfg: mdan_train_routine(model, optimizer, train_loader, dict(), cfg) elif cfg['model'] == 'MODA': cfg['model'] = MODANet().to(device) cfg['train_routine'] = lambda model, optimizer, train_loader, cfg: moda_train_routine(model, optimizer, train_loader, dict(), cfg) elif cfg['model'] == 'MODAFM': cfg['model'] = MODANet().to(device) cfg['excl_transf'] = [Flip] cfg['train_routine'] = lambda model, optimizer, train_loader, cfg: moda_fm_train_routine(model, optimizer, train_loader, dict(), cfg) else: raise ValueError('Unknown model {}'.format(cfg['model'])) best_params, _ = cross_validation(datasets, cfg, cv_param_names) log.print('best_params:', best_params, level=1) results = ConfigParser() results.add_section('main') for key, value in best_params.items(): results.set('main', key, str(value)) with open(cfg['output'], 'w') as f: results.write(f)
def main(): parser = argparse.ArgumentParser( description='Domain adaptation experiments with digits datasets.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '-m', '--model', default='MODAFM', type=str, metavar='', help= 'model type (\'FS\' / \'DANNS\' / \'DANNM\' / \'MDAN\' / \'MODA\' / \'FM\' / \'MODAFM\'' ) parser.add_argument('-d', '--data_path', default='/ctm-hdd-pool01/DB/', type=str, metavar='', help='data directory path') parser.add_argument( '-t', '--target', default='MNIST', type=str, metavar='', help= 'target domain (\'MNIST\' / \'MNIST_M\' / \'SVHN\' / \'SynthDigits\')') parser.add_argument('-o', '--output', default='msda.pth', type=str, metavar='', help='model file (output of train)') parser.add_argument('--icfg', default=None, type=str, metavar='', help='config file (overrides args)') parser.add_argument('--n_src_images', default=20000, type=int, metavar='', help='number of images from each source domain') parser.add_argument('--n_tgt_images', default=20000, type=int, metavar='', help='number of images from the target domain') parser.add_argument( '--mu_d', type=float, default=1e-2, help= "hyperparameter of the coefficient for the domain discriminator loss") parser.add_argument( '--mu_s', type=float, default=0.2, help="hyperparameter of the non-sparsity regularization") parser.add_argument('--mu_c', type=float, default=1e-1, help="hyperparameter of the FixMatch loss") parser.add_argument('--n_rand_aug', type=int, default=2, help="N parameter of RandAugment") parser.add_argument('--m_min_rand_aug', type=int, default=3, help="minimum M parameter of RandAugment") parser.add_argument('--m_max_rand_aug', type=int, default=10, help="maximum M parameter of RandAugment") parser.add_argument('--weight_decay', default=0., type=float, metavar='', help='hyperparameter of weight decay regularization') parser.add_argument('--lr', default=1e-1, type=float, metavar='', help='learning rate') parser.add_argument('--epochs', default=30, type=int, metavar='', help='number of training epochs') parser.add_argument('--batch_size', default=8, type=int, metavar='', help='batch size (per domain)') parser.add_argument( '--checkpoint', default=0, type=int, metavar='', help= 'number of epochs between saving checkpoints (0 disables checkpoints)') parser.add_argument('--eval_target', default=False, type=int, metavar='', help='evaluate target during training') parser.add_argument('--use_cuda', default=True, type=int, metavar='', help='use CUDA capable GPU') parser.add_argument('--use_visdom', default=False, type=int, metavar='', help='use Visdom to visualize plots') parser.add_argument('--visdom_env', default='digits_train', type=str, metavar='', help='Visdom environment name') parser.add_argument('--visdom_port', default=8888, type=int, metavar='', help='Visdom port') parser.add_argument('--verbosity', default=2, type=int, metavar='', help='log verbosity level (0, 1, 2)') parser.add_argument('--seed', default=42, type=int, metavar='', help='random seed') args = vars(parser.parse_args()) # override args with icfg (if provided) cfg = args.copy() if cfg['icfg'] is not None: cv_parser = ConfigParser() cv_parser.read(cfg['icfg']) cv_param_names = [] for key, val in cv_parser.items('main'): cfg[key] = ast.literal_eval(val) cv_param_names.append(key) # dump cfg to a txt file for your records with open(cfg['output'] + '.txt', 'w') as f: f.write(str(cfg) + '\n') # use a fixed random seed for reproducibility purposes if cfg['seed'] > 0: random.seed(cfg['seed']) np.random.seed(seed=cfg['seed']) torch.manual_seed(cfg['seed']) torch.cuda.manual_seed(cfg['seed']) device = 'cuda' if (cfg['use_cuda'] and torch.cuda.is_available()) else 'cpu' log = Logger(cfg['verbosity']) log.print('device:', device, level=0) if ('FS' in cfg['model']) or ('FM' in cfg['model']): # weak data augmentation (small rotation + small translation) data_aug = T.Compose([ T.RandomAffine(5, translate=(0.125, 0.125)), T.ToTensor(), ]) else: data_aug = T.ToTensor() # define all datasets datasets = {} datasets['MNIST'] = MNIST(train=True, path=os.path.join(cfg['data_path'], 'MNIST'), transform=data_aug) datasets['MNIST_M'] = MNIST_M(train=True, path=os.path.join(cfg['data_path'], 'MNIST_M'), transform=data_aug) datasets['SVHN'] = SVHN(train=True, path=os.path.join(cfg['data_path'], 'SVHN'), transform=data_aug) datasets['SynthDigits'] = SynthDigits(train=True, path=os.path.join( cfg['data_path'], 'SynthDigits'), transform=data_aug) if ('FS' in cfg['model']) or ('FM' in cfg['model']): test_set = deepcopy(datasets[cfg['target']]) test_set.transform = T.ToTensor() # no data augmentation in test else: test_set = datasets[cfg['target']] # get a subset of cfg['n_images'] from each dataset # define public and private test sets: the private is not used at training time to learn invariant representations for ds_name in datasets: if ds_name == cfg['target']: indices = random.sample(range(len(datasets[ds_name])), cfg['n_tgt_images'] + cfg['n_src_images']) test_pub_set = Subset(test_set, indices[0:cfg['n_tgt_images']]) test_priv_set = Subset(test_set, indices[cfg['n_tgt_images']::]) datasets[cfg['target']] = Subset(datasets[cfg['target']], indices[0:cfg['n_tgt_images']]) else: indices = random.sample(range(len(datasets[ds_name])), cfg['n_src_images']) datasets[ds_name] = Subset(datasets[ds_name], indices[0:cfg['n_src_images']]) # build the dataloader train_loader = MSDA_Loader(datasets, cfg['target'], batch_size=cfg['batch_size'], shuffle=True, device=device) test_pub_loader = DataLoader(test_pub_set, batch_size=4 * cfg['batch_size']) test_priv_loader = DataLoader(test_priv_set, batch_size=4 * cfg['batch_size']) valid_loaders = ({ 'target pub': test_pub_loader, 'target priv': test_priv_loader } if cfg['eval_target'] else None) log.print('target domain:', cfg['target'], '| source domains:', train_loader.sources, level=1) if cfg['model'] == 'FS': model = SimpleCNN().to(device) optimizer = optim.Adadelta(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) if valid_loaders is not None: del valid_loaders['target pub'] fs_train_routine(model, optimizer, test_pub_loader, valid_loaders, cfg) elif cfg['model'] == 'FM': model = SimpleCNN().to(device) optimizer = optim.Adadelta(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) cfg['excl_transf'] = [Flip] fm_train_routine(model, optimizer, train_loader, valid_loaders, cfg) elif cfg['model'] == 'DANNS': for src in train_loader.sources: model = MODANet().to(device) optimizer = optim.Adadelta(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) dataset_ss = { src: datasets[src], cfg['target']: datasets[cfg['target']] } train_loader = MSDA_Loader(dataset_ss, cfg['target'], batch_size=cfg['batch_size'], shuffle=True, device=device) dann_train_routine(model, optimizer, train_loader, valid_loaders, cfg) torch.save(model.state_dict(), cfg['output'] + '_' + src) elif cfg['model'] == 'DANNM': model = MODANet().to(device) optimizer = optim.Adadelta(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) dann_train_routine(model, optimizer, train_loader, valid_loaders, cfg) elif cfg['model'] == 'MDAN': model = MDANet(len(train_loader.sources)).to(device) optimizer = optim.Adadelta(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) mdan_train_routine(model, optimizer, train_loader, valid_loaders, cfg) elif cfg['model'] == 'MDANU': model = MDANet(len(train_loader.sources)).to(device) model.grad_reverse = nn.ModuleList([ nn.Identity() for _ in range(len(model.domain_class)) ]) # remove grad reverse task_optim = optim.Adadelta(list(model.feat_ext.parameters()) + list(model.task_class.parameters()), lr=cfg['lr'], weight_decay=cfg['weight_decay']) adv_optim = optim.Adadelta(model.domain_class.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) optimizers = (task_optim, adv_optim) mdan_unif_train_routine(model, optimizers, train_loader, valid_loaders, cfg) elif cfg['model'] == 'MDANFM': model = MDANet(len(train_loader.sources)).to(device) optimizer = optim.Adadelta(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) mdan_fm_train_routine(model, optimizer, train_loader, valid_loaders, cfg) elif cfg['model'] == 'MDANUFM': model = MDANet(len(train_loader.sources)).to(device) task_optim = optim.Adadelta(list(model.feat_ext.parameters()) + list(model.task_class.parameters()), lr=cfg['lr'], weight_decay=cfg['weight_decay']) adv_optim = optim.Adadelta(model.domain_class.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) optimizers = (task_optim, adv_optim) cfg['excl_transf'] = [Flip] mdan_unif_fm_train_routine(model, optimizer, train_loader, valid_loaders, cfg) elif cfg['model'] == 'MODA': model = MODANet().to(device) optimizer = optim.Adadelta(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) moda_train_routine(model, optimizer, train_loader, valid_loaders, cfg) elif cfg['model'] == 'MODAFM': model = MODANet().to(device) optimizer = optim.Adadelta(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay']) cfg['excl_transf'] = [Flip] moda_fm_train_routine(model, optimizer, train_loader, valid_loaders, cfg) else: raise ValueError('Unknown model {}'.format(cfg['model'])) if cfg['model'] != 'DANNS': torch.save(model.state_dict(), cfg['output'])
def train(models, writer, device): train_dataset = MNIST(config.dataset_dir, split='train') train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) val_dataset = MNIST(config.dataset_dir, split='val') val_loader = DataLoader(val_dataset, batch_size=config.batch_size, num_workers=config.num_workers) train_iter = iter(train_loader) for i in range(len(config.hidden_features)): for step in range(config.train_iters): try: data, _ = next(train_iter) except StopIteration: train_iter = iter(train_loader) data, _ = next(train_iter) data = data.to(device) x = data for k in range(i): x = models.layers[k].encoder(x) idx = list(range(x.size(1))) random.shuffle(idx) x_noise = x.clone() x_noise[:, idx[:int(x.size(1) * config.w_v)]] = 0 x_rec = models.layers[i](x_noise.detach()) loss = models.mse_criterion(x_rec, x.detach()) models.da_optimizers[i].zero_grad() loss.backward() models.da_optimizers[i].step() if (step + 1) % config.print_step == 0: print("[{}] step: {}/{}, loss: {:.4f}".format( i, step + 1, config.train_iters, loss.item())) train_iter = iter(train_loader) for step in range(config.train_iters): try: data, labels = next(train_iter) except StopIteration: train_iter = iter(train_loader) data, labels = next(train_iter) data = data.to(device) labels = labels.to(device) logits = models(data) loss = models.ce_criterion(logits, labels) models.sda_optimizer.zero_grad() loss.backward() models.sda_optimizer.step() if (step + 1) % config.print_step == 0: print("step: {}/{}, loss: {:.4f}".format(step + 1, config.train_iters, loss.item())) if (step + 1) % config.tensorboard_step == 0: writer.add_scalar('Loss/train', loss.item(), step + 1) with torch.no_grad(): data, labels = next(iter(val_loader)) data = data.to(device) labels = labels.to(device) logits = models(data) loss = models.ce_criterion(logits, labels) writer.add_scalar('Loss/val', loss.item(), step + 1) checkpoint_path = os.path.join(config.checkpoint_dir, config.name, '{:05d}.ckpt'.format(config.train_iters)) torch.save(models.layers.state_dict(), checkpoint_path)
# create the dataloaders dataloader = {} if args.source == 'svhn': dataloader['source_train'] = DataLoader(SVHN(os.path.join( args.data_root, args.source), split='train', transform=dset_transforms, domain_label=0, download=True), batch_size=args.batch_size, shuffle=True, drop_last=True) elif args.source == 'mnist': dataloader['source_train'] = DataLoader(MNIST( os.path.join(args.data_root, args.source), train=True, transform=gray_transforms, domain_label=0, download=True), batch_size=args.batch_size, shuffle=True, drop_last=True) elif args.source == 'mnistm': dataloader['source_train'] = DataLoader(MNISTM( os.path.join(args.data_root, args.source), train=True, transform=mnistm_transforms, domain_label=0, download=True), batch_size=args.batch_size, shuffle=True, drop_last=True)
args.plotfolder = os.path.join(args.checkpoint, "plotter") if not os.path.isdir(args.plotfolder): mkdir_p(args.plotfolder) # folder to save histogram args.histfolder = os.path.join(args.checkpoint, "histogram") if not os.path.isdir(args.histfolder): mkdir_p(args.histfolder) print('==> Preparing data..') transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) trainset = MNIST(root='../../data', train=True, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testset = MNIST(root='../../data', train=False, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) # data loader trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.stage1_bs, shuffle=True, num_workers=4)
from datasets import CIFAR10, MNIST import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import numpy as np from models import OtherNet, GroupNet, Net dataset = MNIST(8) model = OtherNet(dataset.batch_size, dataset.shape, kernel_size=3, maxpool=2) path = './' + model.__class__.__name__ + '.torch' optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) model.train_with_loader(dataset.train_loader, dataset.test_loader, optimizer, num_epochs=100) torch.save(model.state_dict(), path)
def train(batchsize=512, feature_size=200, lr_g=3e-3, lr_d=3e-3, n_epoches=5000, milestones=[3500, 4000,]): ## 数据 mnistdata = MNIST() mnistloader = DataLoader(mnistdata, batchsize, True, drop_last=True) ## 网络 GNet = GeneratorNet(feature_size) DNet = DiscriminatorNet() if cuda.is_available(): GNet.cuda() DNet.cuda() ## 损失 criterion = nn.BCELoss() ## 优化器 optimizerG = optim.SGD(GNet.parameters(), lr_g) optimizerD = optim.SGD(DNet.parameters(), lr_d) schedulerG = MultiStepLR(optimizerG, milestones) schedulerD = MultiStepLR(optimizerD, milestones) ## 日志 writer = SummaryWriter('../log/GAN_MNIST') bar = ProcessBar(n_epoches) for i_epoch in range(n_epoches): bar.step() schedulerG.step() schedulerD.step() lossG = []; lossD = [] for i_batch, (realImg, _) in enumerate(mnistloader): ## 展开为长向量 realImg = realImg.view(batchsize, -1) ## 生成对应标签 realLabels = torch.ones (batchsize).float() fakeLabels = torch.zeros(batchsize).float() ## 生成虚假图片 noise = torch.rand(batchsize, feature_size) if cuda.is_available(): noise = noise.cuda() realImg = realImg.cuda() realLabels = realLabels.cuda() fakeLabels = fakeLabels.cuda() fakeImg = GNet(noise) ## 计算真实图片的鉴别损失,希望其为`1` pred_real = DNet(realImg) lossD_real = criterion(pred_real, realLabels) ## 计算虚假图片的鉴别损失,希望其为`0` pred_fake = DNet(fakeImg) lossD_fake = criterion(pred_fake, fakeLabels) ## 计算鉴别器损失,更新鉴别器参数 lossD_i = (lossD_real + lossD_fake) / 2 optimizerD.zero_grad() lossD_i.backward() optimizerD.step() ## 重新生成图片 noise = torch.randn(batchsize, feature_size) if cuda.is_available(): noise = noise.cuda() fakeImg = GNet(noise) ## 计算生成器损失,希望鉴别器得到`1` pred_fake = DNet(fakeImg) lossG_i = criterion(pred_fake, realLabels ) optimizerG.zero_grad() lossG_i.backward() optimizerG.step() lossG += [lossG_i.detach().cpu().numpy()] lossD += [lossD_i.detach().cpu().numpy()] ## 日志 lossG = np.mean(lossG); lossD = np.mean(lossD) writer.add_scalars('lossDss', {'G': lossG, 'D': lossD}, i_epoch) writer.add_images('image', fakeImg.view(batchsize, 1, 28, 28).repeat(1, 3, 1, 1), i_epoch) writer.close()
brick.biases_init = Constant(0) brick.initialize() rnn.weights_init = Identity() rnn.biases_init = Constant(0) rnn.initialize() print 'Bulding training process...' algorithm = GradientDescent(cost=cost, parameters=ComputationGraph(cost).parameters, step_rule=learning_algorithm( learning_rate=1e-6, momentum=0.0, clipping_threshold=1.0, algorithm='adam')) train_stream, valid_stream = MNIST(batch_size=batch_size) monitor_train_cost = TrainingDataMonitoring([cost, error_rate], prefix="train", after_epoch=True) monitor_valid_cost = DataStreamMonitoring([cost, error_rate], data_stream=valid_stream, prefix="train", after_epoch=True) model = Model(cost) main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=[ monitor_train_cost, monitor_valid_cost,
def main(): args.checkpoint = './checkpoints/mnist/' + args.arch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # folder to save figures args.plotfolder = './checkpoints/mnist/' + args.arch + '/plotter' if not os.path.isdir(args.plotfolder): mkdir_p(args.plotfolder) device = 'cuda' if torch.cuda.is_available() else 'cpu' print(device) start_epoch = 0 # start from epoch 0 or last checkpoint epoch print('==> Preparing data..') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) trainset = MNIST(root='../../data', train=True, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) testset = MNIST(root='../../data', train=False, download=True, transform=transform, train_class_num=args.train_class_num, test_class_num=args.test_class_num, includes_all_train_class=args.includes_all_train_class) # data loader trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False, num_workers=4) print('==> Building model..') net = Network(backbone=args.arch, num_classes=args.train_class_num,embed_dim=args.embed_dim) fea_dim = net.classifier.in_features net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True criterion_softamx = nn.CrossEntropyLoss() criterion_centerloss = CenterLoss(num_classes=args.train_class_num, feat_dim=fea_dim).to(device) optimizer_softmax = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) optimizer_centerloss = torch.optim.SGD(criterion_centerloss.parameters(), lr=args.center_lr, momentum=0.9, weight_decay=5e-4) if args.resume: # Load checkpoint. if os.path.isfile(args.resume): print('==> Resuming from checkpoint..') checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint['net']) criterion_centerloss.load_state_dict(checkpoint['centerloss']) # best_acc = checkpoint['acc'] # print("BEST_ACCURACY: "+str(best_acc)) start_epoch = checkpoint['epoch'] logger = Logger(os.path.join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'Total Loss','Softmax Loss', 'Center Loss', 'train Acc.']) if not args.evaluate: scheduler = lr_scheduler.StepLR(optimizer_softmax, step_size=20, gamma=0.1) for epoch in range(start_epoch, start_epoch + args.es): print('\nEpoch: %d Learning rate: %f' % (epoch + 1, optimizer_softmax.param_groups[0]['lr'])) train_loss, softmax_loss, center_loss, train_acc = train(net, trainloader, optimizer_softmax, optimizer_centerloss, criterion_softamx, criterion_centerloss, device) save_model(net, criterion_centerloss, epoch, os.path.join(args.checkpoint, 'last_model.pth')) # plot the training data if args.plot: plot_feature(net,criterion_centerloss, trainloader, device, args.plotfolder, epoch=epoch, plot_class_num=args.train_class_num,maximum=args.plot_max, plot_quality=args.plot_quality) logger.append([epoch + 1, train_loss, softmax_loss, center_loss, train_acc]) scheduler.step() test(net, testloader, device) if args.plot: plot_feature(net, criterion_centerloss, testloader, device, args.plotfolder, epoch="test", plot_class_num=args.train_class_num+1, maximum=args.plot_max, plot_quality=args.plot_quality) logger.close()
model_parser.add_argument('--otherrgb', action='store_true', default=False) dataset_parser = parser.add_mutually_exclusive_group(required=True) dataset_parser.add_argument('--mnist', action='store_true', default=False) dataset_parser.add_argument('--cifar10', action='store_true', default=False) args = parser.parse_args() num_epochs = args.epochs save_step = args.save_step kernel_size = args.kernel_size maxpool = 1 if args.disable_pool else 2 dropout = not args.disable_dropout if args.cifar10: dataset = CIFAR10(args.batch_size) else: dataset = MNIST(args.batch_size) comment = args.comment if args.other: model_class = OtherNet if args.group: model_class = GroupNet if args.net: model_class = Net if args.grouprgb: model_class = GroupNetRGB if args.otherrgb: model_class = OtherNetRGB print(args) model = model_class(dataset.batch_size, dataset.shape, kernel_size=kernel_size,
def test_mnist(on_cloud=0): """Train an autoencoder on MNIST. This function will train an autoencoder on MNIST and also save many image files during the training process, demonstrating the latent space of the inner most dimension of the encoder, as well as reconstructions of the decoder. """ # load MNIST n_code = 2 mnist = MNIST(split=[0.8, 0.1, 0.1]) ae = VAE(input_shape=[None, 784], n_filters=[512, 256], n_hidden=64, n_code=n_code, activation=tf.nn.sigmoid, convolutional=False, variational=True) n_examples = 100 zs = np.random.uniform(-1.0, 1.0, [4, n_code]).astype(np.float32) zs = utils.make_latent_manifold(zs, n_examples) learning_rate = 0.02 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize( ae['cost']) # We create a session to use the graph sess = tf.Session() sess.run(tf.global_variables_initializer()) # Fit all training data t_i = 0 batch_i = 0 batch_size = 200 n_epochs = 10 test_xs = mnist.test.images[:n_examples] if (on_cloud == 0): utils.montage(test_xs.reshape((-1, 28, 28)), 'test_xs.png') else: utils.montage(test_xs.reshape((-1, 28, 28)), '/output/test_xs.png') for epoch_i in range(n_epochs): train_i = 0 train_cost = 0 for batch_xs, _ in mnist.train.next_batch(batch_size): train_cost += sess.run([ae['cost'], optimizer], feed_dict={ ae['x']: batch_xs, ae['train']: True, ae['keep_prob']: 1.0 })[0] train_i += 1 if batch_i % 10 == 0: # Plot example reconstructions from latent layer recon = sess.run(ae['y'], feed_dict={ ae['z']: zs, ae['train']: False, ae['keep_prob']: 1.0 }) m = utils.montage(recon.reshape((-1, 28, 28)), 'manifold_%08d.png' % t_i) # Plot example reconstructions recon = sess.run(ae['y'], feed_dict={ ae['x']: test_xs, ae['train']: False, ae['keep_prob']: 1.0 }) m = utils.montage(recon.reshape((-1, 28, 28)), 'reconstruction_%08d.png' % t_i) t_i += 1 batch_i += 1 valid_i = 0 valid_cost = 0 for batch_xs, _ in mnist.valid.next_batch(batch_size): valid_cost += sess.run([ae['cost']], feed_dict={ ae['x']: batch_xs, ae['train']: False, ae['keep_prob']: 1.0 })[0] valid_i += 1 print('train:', train_cost / train_i, 'valid:', valid_cost / valid_i)
"""Tutorial on how to build a convnet w/ modern changes, e.g. Batch Normalization, Leaky rectifiers, and strided convolution. Parag K. Mital, Jan 2016. """ # %% import tensorflow as tf from batch_norm import batch_norm from activations import lrelu from connections import conv2d, linear from datasets import MNIST # %% Setup input to the network and true output label. These are # simply placeholders which we'll fill in later. mnist = MNIST() x = tf.placeholder(tf.float32, [None, 784]) y = tf.placeholder(tf.float32, [None, 10]) x_tensor = tf.reshape(x, [-1, 28, 28, 1]) # %% Define the network: bn1 = batch_norm(-1, name='bn1') bn2 = batch_norm(-1, name='bn2') bn3 = batch_norm(-1, name='bn3') h_1 = lrelu(bn1(conv2d(x_tensor, 32, name='conv1')), name='lrelu1') h_2 = lrelu(bn2(conv2d(h_1, 64, name='conv2')), name='lrelu2') h_3 = lrelu(bn3(conv2d(h_2, 64, name='conv3')), name='lrelu3') h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4]) h_4 = linear(h_3_flat, 10) y_pred = tf.nn.softmax(h_4)