def __init__( self, loaders, batch_size, learning_rate, num_routing=3, lr_decay=0.9, device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), multi_gpu=(torch.cuda.device_count() > 1)): self.device = device self.multi_gpu = multi_gpu self.loaders = loaders img_shape = self.loaders['train'].dataset[0][0].numpy().shape self.net = CapsuleNetwork(img_shape=img_shape, channels=256, primary_dim=8, num_classes=10, out_dim=16, num_routing=num_routing, device=self.device).to(self.device) if self.multi_gpu: self.net = nn.DataParallel(self.net) self.criterion = CapsuleLoss(loss_lambda=0.5, recon_loss_scale=5e-4) self.optimizer = optim.Adam(self.net.parameters(), lr=learning_rate) self.scheduler = optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=lr_decay) print(8 * '#', 'PyTorch Model built'.upper(), 8 * '#') print('Num params:', sum([prod(p.size()) for p in self.net.parameters()]))
def main(): global args, train_writer, test_writer args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) device = torch.device("cuda" if args.cuda else "cpu") # tensorboard logging train_writer = SummaryWriter(comment='train') test_writer = SummaryWriter(comment='test') # dataset num_class, img_dim, train_loader, test_loader = get_setting(args) # model # A, B, C, D = 64, 8, 16, 16 A, B, C, D = 32, 32, 32, 32 model = capsules(A=A, B=B, C=C, D=D, E=num_class, iters=args.em_iters, add_decoder=args.add_decoder, img_dim=img_dim).to(device) print("Number of trainable parameters: {}".format( sum(param.numel() for param in model.parameters()))) criterion = CapsuleLoss(alpha=args.alpha, mode='bce', num_class=num_class, add_decoder=args.add_decoder) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) best_loss, best_score = test(test_loader, model, criterion, 0, device) for epoch in range(1, args.epochs + 1): scores = train(train_loader, model, criterion, optimizer, epoch, device) if epoch % args.test_intvl == 0: test_loss, test_score = test(test_loader, model, criterion, epoch * len(train_loader), device) if test_loss < best_loss or test_score > best_score: snapshot(model, args.snapshot_folder, epoch) best_loss = min(best_loss, test_loss) best_score = max(best_score, test_score) print('best test score: {:.6f}'.format(best_score)) train_writer.close() test_writer.close() # save end model snapshot(model, args.snapshot_folder, 'end_{}'.format(args.epochs))
def __init__(self, train_loader, test_loader, writer=None, device="cpu", lr=0.1, lr_decay=0.9, **kwargs): super().__init__(train_loader, test_loader, writer, device, lr, lr_decay, **kwargs) self.model = CapsNetMnist(device).to(device) self.loss = CapsuleLoss().to(device) self.optimizer = Adam(self.model.parameters(), lr=self.lr) self.scheduler = ExponentialLR(self.optimizer, gamma=self.lr_decay)
def main(): # get general config conf, parser = get_conf() # get logger and log config log = get_logger(__name__) log.info(parser.format_values()) # seed must be set before any stochastic operation in torch or numpy if conf.seed: torch.manual_seed(conf.seed) np.random.seed(conf.seed) # get data set transform = transforms.ToTensor() data_train, data_test, data_shape, label_shape = get_dataset( conf.dataset, transform=transform) assert conf.architecture.final.caps == label_shape, "Number of final capsule should match the number of labels." # init basic capsnet model = BasicCapsNet(in_channels=data_shape[0], routing_iters=conf.routing_iters, in_height=data_shape[1], in_width=data_shape[2], stdev_W=conf.stdev_W, bias_routing=conf.bias_routing, arch=conf.architecture, recon=conf.use_recon) # init capsule loss capsule_loss = CapsuleLoss(conf.m_plus, conf.m_min, conf.alpha, include_recon=conf.use_recon) # init adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) # init Trainer that supports the ignite training processs trainer = CapsuleTrainer(model, capsule_loss, optimizer, data_train, data_test, conf) # start trainer trainer.run()
def main(): SAVE_MODEL_PATH = 'trained/' if not os.path.exists(SAVE_MODEL_PATH): os.mkdir(SAVE_MODEL_PATH) DATA_PATH = '.data/' parser = argparse.ArgumentParser() # dataset parser.add_argument('dataset', nargs='?', type=str, default='MNIST', help="'MNIST' or 'CIFAR' (case insensitive).") # whether or not to use GPU parser.add_argument( '--device', type=str, default=("cuda" if torch.cuda.is_available() else "cpu"), choices=['cuda', 'cpu'], help='Device to use. Choose "cuda" for GPU or "cpu".') # batch size parser.add_argument('--batch_size', type=int, default=16, help='Batch size, default is 16') # traning epoch parser.add_argument('--epoch', type=int, default=50, help='training epoch') # primary capsule parser.add_argument('--dim_caps', type=int, default=8, help='dimension of each capsule, default is 8') # out capsule parser.add_argument('--out_caps', type=int, default=16) # conv in_channels parser.add_argument('--in_conv_channels', type=int, default=1) # conv out_channels parser.add_argument('--out_conv_cahnnels', type=int, default=256) # num of routing parser.add_argument('--num_routing', type=int, default=3) # lr parser.add_argument('--learning_rate', type=float, default=1e-3) # Exponential learning rate decay parser.add_argument('--lr_decay', type=float, default=0.96, help='Exponential learning rate decay.') # data path parser.add_argument('--data_path', type=str, default=DATA_PATH) # channels in primary capsule layer parser.add_argument('--primary_channels', type=int, default=32, help='num of Channels in PrimaryCapsule layer') args = parser.parse_args() # GPU or CPU device = torch.device(args.device) assert (args.dataset.upper() == 'MNIST' or args.dataset.upper() == 'CIFAR'), 'dataset must be MNIST or CIFAR' print('===> Data loading') if args.dataset.upper() == 'MNIST': args.data_path = os.path.join(args.data_path, 'MNIST') size = 28 classes = list(range(10)) mean, std = ((0.1307, ), (0.3081, )) transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) trainset = torchvision.datasets.MNIST(root=args.data_path, train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.MNIST(root=args.data_path, train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=2) in_conv_channels = 1 out_conv_cahnnels = 256 elif args.dataset.upper() == 'CIFAR': args.data_path = os.path.join(args.data_path, 'CIFAR') size = 32 classes = [ 'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] mean, std = ((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) trainset = torchvision.datasets.CIFAR10(root=args.data_path, train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root=args.data_path, train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=2) in_conv_channels = 3 out_conv_cahnnels = 256 print('===> Data loaded') print('===> Building model') img_shape = trainloader.dataset[0][0].numpy().shape model = CapsuleNet(img_shape, in_conv_channels, out_conv_cahnnels, args.primary_channels, args.dim_caps, len(classes), args.out_caps, args.num_routing, device) # set model to device [CPU or GPU] model = model.to(device) # Are we using GPU print('\nDeivce: {}'.format(device)) # Print model architecture and parameters print('Model architectures:\n{}\n'.format(model)) print('Parameters and size:') for name, param in model.named_parameters(): print('{}: {}'.format(name, list(param.size()))) criterion = CapsuleLoss() learning_rate = args.learning_rate optimizer = optim.Adam(model.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_decay) print('===> Training') train(model, classes, args.epoch, criterion, optimizer, scheduler, trainloader, device) print('===> Testing') model.eval() correct = 0 total = 0 with torch.no_grad(): for (images, labels) in testloader: outputs, reconstructions = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy on {}: {}'.format(args.dataset.upper(), 100 * correct / total))
train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) train_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Train Accuracy'}) test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}) test_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Test Accuracy'}) confusion_logger = VisdomLogger('heatmap', opts={ 'title': 'Confusion Matrix', 'columnnames': list(range(config.NUM_CLASSES)), 'rownames': list(range(config.NUM_CLASSES)) }) ground_truth_logger = VisdomLogger('image', opts={'title': 'Ground Truth'}) reconstruction_logger = VisdomLogger('image', opts={'title': 'Reconstruction'}) capsule_loss = CapsuleLoss() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(processor, utils.get_iterator(True), maxepoch=config.NUM_EPOCHS, optimizer=optimizer)
print("\nSizes of parameters: ") for name, param in model.named_parameters(): print(f"{name}: {list(param.size())}") n_params = sum([p.nelement() for p in model.parameters()]) # The coupling coefficients b_ij are not included in the parameter list, # we need to add them mannually, which is 1152 * 10 = 11520. print('\nTotal number of parameters: %d \n' % (n_params+11520)) GPU_AVAILABLE = torch.cuda.is_available() if(GPU_AVAILABLE): print("Training on GPU") model = model.cuda() else: print("Only CPU available, training on CPU") criterion = CapsuleLoss() optimizer = optim.Adam(model.parameters()) def train(model, criterion, optimizer, n_epochs, print_every=300): ''' Trains a capsule network and prints out training batch loss statistics. Saves model parameters if *validation* loss has decreased. param model: trained capsule network param criterion: capsule loss function param optimizer: optimizer for updating network weights param n_epochs: number of epochs to train for param print_every: batches to print and save training loss, default = 100 return: list of recorded training losses '''