def __init__(self, model, optimizer=None, loss=None, name="m", lr=0.001, alpha=0.001, risk_factors=None, regularization=None, reg_parameters=None): ''' optimizer: optimization method, default to adam reg_parameters: parameters to regularize on ''' self.model = model to_cuda(model) if optimizer is None: optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) self.optimizer = optimizer if loss is None: loss = nn.NLLLoss() self.loss = loss self.name = name if regularization is not None and\ alpha is not None and\ reg_parameters is not None: # e.g., eye_loss self.loss = regularization(loss, alpha, reg_parameters, risk_factors)
def train(self, train_loader): self.model.train() for batch_idx, (data, target) in enumerate(train_loader): if type(data) is dict: data = data['image'] if self.use_cuda: data, target = to_cuda(data), to_cuda(target) data, target = to_var(data), to_var(target) self.optimizer.zero_grad() if self.twoImage: loss = self.train_step2(data, target) else: loss = self.train_step(data, target) self.optimizer.step() if batch_idx % 500 == 0: if type(data) is list: len_data = len(data[0]) else: len_data = len(data) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( self.epoch, batch_idx * len_data, len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss)) self.epoch += 1
def __init__(self, switchNet, weightNet, apply_f, lr=0.001, alpha=0.001, beta=0.001, max_grad=None, log_name=None, silence=True, mtl=False, max_time=30, n_early_stopping=100, print_every=100, plot=True, weight_decay=0, switch_update_every=1, weight_update_every=1): ''' optimizer: optimization method, default to adam alpha: z entropy weight beta: y entropy weight max_grad: gradient clipping max silence: don't output graph and statement mtl: multi-task learning print_every: print every few iterations, if 0 then don't print weight_update_every: update weight every few steps switch_update_every: update switch every few steps ''' switchNet = to_cuda(switchNet) weightNet = to_cuda(weightNet) self.max_time = max_time # max gpu training time self.switchNet = switchNet self.switch_size = switchNet.switch_size self.weightNet = weightNet self.apply_f = apply_f self.n_early_stopping = n_early_stopping self.print_every = print_every self.draw_plot = plot self.weight_update_every = weight_update_every self.switch_update_every = switch_update_every self.mtl = mtl self.silence = silence self.setLogName(log_name) self.optSwitch = torch.optim.Adam(self.switchNet.parameters(), lr=lr, weight_decay=weight_decay) self.optWeight = torch.optim.Adam(self.weightNet.parameters(), lr=lr, weight_decay=weight_decay) self.loss = nn.SoftMarginLoss() # logit loss self.elementwise_loss = logit_elementwise_loss self.max_grad = max_grad self.alpha = alpha self.beta = beta self.z = None
def __init__(self, model, optimizer=None, loss=None, name="m", lr=0.001, alpha=0, risk_factors=None, regularization=no_reg): ''' optimizer: optimization method, default to adam reg_parameters: parameters to regularize on ''' self.model = model to_cuda(model) model.train() if optimizer is None: optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) self.optimizer = optimizer if loss is None: loss = nn.CrossEntropyLoss() self.loss = loss self.name = name self.loss = regularization(loss, alpha, risk_factors)
def test(self, test_loader): self.model.eval() test_loss = 0 correct = 0 for data, target in test_loader: if type(data) is dict: data = data['image'] if self.use_cuda: data, target = to_cuda(data), to_cuda(target) data, target = to_var(data, volatile=True), to_var(target) output = self.model(data) # sum up batch loss test_loss += F.nll_loss(output, target, size_average=False).item() # get the index of the max log-probability pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n' .format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) return 100. * correct / len(test_loader.dataset)
def __init__(self, autoencoder, lr=0.001, print_every=100, log_name=None, max_time=30, n_early_stopping=100, weight_decay=0): ''' print_every is 0 if do not wish to print ''' self.transform_function = lambda x: x # identity, for combined trainer self.autoencoder = to_cuda(autoencoder) self.optimizer = torch.optim.Adam(self.autoencoder.parameters(), lr=lr, weight_decay=weight_decay) self.max_time = max_time self.n_early_stopping = n_early_stopping self.loss = nn.MSELoss() # l2 loss self.print_every = print_every self.setLogName(log_name)
def __init__(self, weightNet, apply_f, lr=0.001, print_every=100, log_name=None, max_time=30, n_early_stopping=100, weight_decay=0): ''' print_every is 0 if do not wish to print ''' self.transform_function = lambda x: x # identity, for combined trainer self.weightNet = to_cuda(weightNet) self.apply_f = apply_f self.optimizer = torch.optim.Adam(self.weightNet.parameters(), lr=lr, weight_decay=weight_decay) self.max_time = max_time self.n_early_stopping = n_early_stopping self.loss = nn.SoftMarginLoss() # logit loss self.print_every = print_every self.setLogName(log_name)
model = STN2(padding_mode=args.p, init_mode=args.i) elif args.e == 'one': model = STN3(padding_mode=args.p, init_mode=args.i) elif args.m == 'cnn': if args.e == 'mnist' or args.e == 'same': model = CNN() elif args.e == 'double': model = CNN2() elif args.e == 'one': model = CNN3() elif args.m == 'fcn': model = FCN() use_cuda = torch.cuda.is_available() if use_cuda: model = to_cuda(model) # training optimizer = optim.Adam(model.parameters()) t = Trainer(model, optimizer, use_cuda=use_cuda, twoImage=(args.e == 'same')) for epoch in range(1, 10 + 1): t.train(train_loader) t.test(test_loader) # save the model loc = os.path.join(args.l, args.e) os.system('mkdir -p %s' % loc)