class Trainer(object): def __init__(self, model=None, criterion=None, optim=None): self.model = model self.criterion = criterion self.optim = optim self.engine = Engine() # These parameters should be defined in subclass. self.meters = [] self.batch_size = NotImplemented self.batch_workers = NotImplemented def set_up(self): raise NotImplemented def _print_information(self, prefix): raise NotImplemented def get_loss_and_output(self, sample): raise NotImplemented def reset_meters(self): for meter in self.meters: meter.reset() def get_iterator(self, is_train): raise NotImplemented @staticmethod def on_sample(state): # state['sample'].append(state['train']) pass def on_forward(self, state): raise NotImplemented def on_start_epoch(self, state): # self.reset_meters() pass # state['iterator'] = tqdm(state['iterator']) def on_end_epoch(self, state): raise NotImplemented def on_update(self, state): raise NotImplemented def run(self, epochs): self.engine.hooks['on_sample'] = self.on_sample self.engine.hooks['on_forward'] = self.on_forward self.engine.hooks['on_start_epoch'] = self.on_start_epoch self.engine.hooks['on_end_epoch'] = self.on_end_epoch self.engine.train(self.get_loss_and_output, self.get_iterator(True), maxepoch=epochs, optimizer=self.optim)
def __init__(self, model=None, criterion=None, optim=None): self.model = model self.criterion = criterion self.optim = optim self.engine = Engine() # These parameters should be defined in subclass. self.meters = [] self.batch_size = NotImplemented self.batch_workers = NotImplemented
def main(): meter_loss = tnt.meter.AverageValueMeter() classerr = tnt.meter.ClassErrorMeter(accuracy=True) params = { 'conv0.weight': conv_init(1, 50, 5), 'conv0.bias': torch.zeros(50), 'conv1.weight': conv_init(50, 50, 5), 'conv1.bias': torch.zeros(50), 'linear2.weight': linear_init(800, 512), 'linear2.bias': torch.zeros(512), 'linear3.weight': linear_init(512, 10), 'linear3.bias': torch.zeros(10), } for k, v in params.items(): params[k] = Variable(v, requires_grad=True) def h(sample): inputs = Variable(sample[0].float() / 255.0) targets = Variable(torch.LongTensor(sample[1])) o = f(params, inputs, sample[2]) return F.cross_entropy(o, targets), o def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): classerr.reset() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): print classerr.value() optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, get_iterator(True), 10, optimizer) engine.test(h, get_iterator(False))
def main(): params = { 'conv0.weight': conv_init(1, 50, 5), 'conv0.bias': torch.zeros(50), 'conv1.weight': conv_init(50, 50, 5), 'conv1.bias': torch.zeros(50), 'linear2.weight': linear_init(800, 512), 'linear2.bias': torch.zeros(512), 'linear3.weight': linear_init(512, 10), 'linear3.bias': torch.zeros(10), } params = {k: Variable(v, requires_grad=True) for k, v in params.items()} optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) engine = Engine() mlog = MeterLogger(nclass=10, title="mnist_meterlogger") def h(sample): inputs = Variable(sample[0].float() / 255.0) targets = Variable(torch.LongTensor(sample[1])) o = f(params, inputs, sample[2]) return F.cross_entropy(o, targets), o def on_sample(state): state['sample'].append(state['train']) def on_forward(state): loss = state['loss'] output = state['output'] target = state['sample'][1] # online ploter mlog.update_loss(loss, meter='loss') mlog.update_meter(output, target, meters={'accuracy', 'map', 'confusion'}) def on_start_epoch(state): mlog.timer.reset() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): mlog.print_meter(mode="Train", iepoch=state['epoch']) mlog.reset_meter(mode="Train", iepoch=state['epoch']) # do validation at the end of each epoch engine.test(h, get_iterator(False)) mlog.print_meter(mode="Test", iepoch=state['epoch']) mlog.reset_meter(mode="Test", iepoch=state['epoch']) engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, get_iterator(True), maxepoch=10, optimizer=optimizer)
def main(): params = { "conv0.weight": conv_init(1, 50, 5), "conv0.bias": torch.zeros(50), "conv1.weight": conv_init(50, 50, 5), "conv1.bias": torch.zeros(50), "linear2.weight": linear_init(800, 512), "linear2.bias": torch.zeros(512), "linear3.weight": linear_init(512, 10), "linear3.bias": torch.zeros(10), } params = {k: Variable(v, requires_grad=True) for k, v in params.items()} optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() classerr = tnt.meter.ClassErrorMeter(accuracy=True) def h(sample): inputs = Variable(sample[0].float() / 255.0) targets = Variable(torch.LongTensor(sample[1])) o = f(params, inputs, sample[2]) return F.cross_entropy(o, targets), o def reset_meters(): classerr.reset() meter_loss.reset() def on_sample(state): state["sample"].append(state["train"]) def on_forward(state): classerr.add(state["output"].data, torch.LongTensor(state["sample"][1])) meter_loss.add(state["loss"].data[0]) def on_start_epoch(state): reset_meters() state["iterator"] = tqdm(state["iterator"]) def on_end_epoch(state): print("Training loss: %.4f, accuracy: %.2f%%" % (meter_loss.value()[0], classerr.value()[0])) # do validation at the end of each epoch reset_meters() engine.test(h, get_iterator(False)) print("Testing loss: %.4f, accuracy: %.2f%%" % (meter_loss.value()[0], classerr.value()[0])) engine.hooks["on_sample"] = on_sample engine.hooks["on_forward"] = on_forward engine.hooks["on_start_epoch"] = on_start_epoch engine.hooks["on_end_epoch"] = on_end_epoch engine.train(h, get_iterator(True), maxepoch=10, optimizer=optimizer)
def train_and_track(train_dataloader, test_dataloader): criterion = nn.CrossEntropyLoss() model = resnext18() learning_rate = 0.001 optimizer = optim.Adam(model.parameters(), lr=learning_rate) engine = Engine() port = 8097 train_loss_logger = VisdomPlotLogger( 'line', port=port, opts={'title': 'Train CrossEntropyLoss'}) train_err_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Train Class Accuracy'}) test_loss_logger = VisdomPlotLogger( 'line', port=port, opts={'title': 'Test CrossEntropyLoss'}) test_err_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Test Class Accuracy'}) meter_loss = tnt.meter.AverageValueMeter() classerr = tnt.meter.ClassErrorMeter(accuracy=True) def run_model(sample): images, labels = sample outputs = model(images) loss = criterion(outputs, labels) return loss, outputs def reset_meters(): classerr.reset() meter_loss.reset() def on_forward(state): classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data.item()) def on_start_epoch(state): reset_meters() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): train_loss_logger.log(state['epoch'], meter_loss.value()[0]) train_err_logger.log(state['epoch'], classerr.value()[0]) # Check accuracy on test after each epoch. reset_meters() engine.test(run_model, test_dataloader) test_loss_logger.log(state['epoch'], meter_loss.value()[0]) test_err_logger.log(state['epoch'], classerr.value()[0]) engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(run_model, train_dataloader, maxepoch=10, optimizer=optimizer)
def test(model, testloader, loss_function, device): model.eval() model.to(device) engine = Engine() def compute_loss(data): inputs = data[0] labels = data[1] inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) return loss_function(outputs, labels), outputs def on_start(state): print("Running inference ...") state['iterator'] = tqdm(state['iterator'], leave=False) class Accuracy(): _accuracy = 0. _sample_size = 0. def on_forward(state): batch_size = state['sample'][1].shape[0] Accuracy._sample_size += batch_size Accuracy._accuracy += batch_size * get_accuracy(state['output'].cpu(), state['sample'][1].cpu()) engine.hooks['on_start'] = on_start engine.hooks['on_forward'] = on_forward engine.test(compute_loss, testloader) return Accuracy._accuracy / Accuracy._sample_size
def train(batch_size=512, epochs=100): from torch.autograd import Variable import torchnet as tnt from torchnet.engine import Engine from tensorboardX import SummaryWriter from autoencoders.models.sampling import sample_vae import autoencoders.data.mnist as mnist from autoencoders.utils.tensorboard import run_path use_gpu = torch.cuda.is_available() writer = SummaryWriter(run_path('conv_vae')) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() model = ConvolutionalVariationalAutoencoder() optimizer = torch.optim.Adam(model.parameters(), 3e-4) dataloader = mnist(batch_size=batch_size) if use_gpu: model.cuda() vae_loss.cuda() def h(sample): inputs, _ = sample inputs = Variable(inputs) if use_gpu: inputs = inputs.cuda() output, mu, logvar = model(inputs) loss = vae_loss(output, inputs, mu, logvar) return loss, output def on_forward(state): meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): meter_loss.reset() def on_end_epoch(state): writer.add_scalar('loss', meter_loss.value()[0], state['epoch']) writer.add_image('image', sample_vae( model, dataloader), state['epoch']) meter_loss.reset() # engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, dataloader, maxepoch=epochs, optimizer=optimizer)
def __init__(self): self._engine: Engine = Engine() # All possible hooks that is called by engine.train and engine.test self._hooks: Dict[str, Callable] = dict({ "on_start": self.on_start, # start of the procedure "on_start_epoch": self.on_start_epoch, # train exclusive "on_sample": self.on_sample, # get data point from the data-loader "on_forward": self.on_forward, # the only phase that both train/test applies "on_update": self.on_update, # train exclusive, after the "step" of backward updating "on_end_epoch": self.on_end_epoch, # train exclusive - usually test is invoked here "on_end": self.on_end, # end of the procedure }) self.engine.hooks = self.hooks
def calibrate(model, testloader, loss_function, device): """Calibrates the weight and activation quantization parameters. Executes forward passes using the input data from `testloader`. For every forward pass, collects the statistics and calibrates the quantization parameters for all the weight and activation quant modules. Arguments: model (:class:`QuantizedNet`): nn model to train testloader (:class:`torch.utils.data.DataLoader`): dataloader to iterate through the sample data for calibration loss_function (:class:`torch.nn._Loss`): function to compute loss device (:class:`torch.device`): the device to run calibration on Returns: Module: calibrated quantized model """ model.eval() model.to(device) engine = Engine() # Enable profiling to collect statistics and calibrate quant params model._profile() # Quantize weights model._quantize_weights() def compute_loss(data): """Computes the loss from a given nn model.""" inputs = data[0] labels = data[1] inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) return loss_function(outputs, labels), outputs def on_start(state): print("Calibrating quantized network ...") state['iterator'] = tqdm(state['iterator'], leave=False) def on_forward(state): loss = state['loss'].item() accuracy = get_accuracy(state['output'].cpu(), state['sample'][1].cpu()) state['iterator'].write('batch %d loss %.3f accuracy %.3f ' % (state['t'], loss, accuracy), end='\n') engine.hooks['on_start'] = on_start engine.hooks['on_forward'] = on_forward engine.test(compute_loss, testloader) return model
def main(): params = { 'conv0.weight': conv_init(1, 50, 5), 'conv0.bias': torch.zeros(50), 'conv1.weight': conv_init(50, 50, 5), 'conv1.bias': torch.zeros(50), 'linear2.weight': linear_init(800, 512), 'linear2.bias': torch.zeros(512), 'linear3.weight': linear_init(512, 10), 'linear3.bias': torch.zeros(10), } params = {k: Variable(v, requires_grad=True) for k, v in params.items()} optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() classerr = tnt.meter.ClassErrorMeter(accuracy=True) def h(sample): inputs = Variable(sample[0].float() / 255.0) targets = Variable(torch.LongTensor(sample[1])) o = f(params, inputs, sample[2]) return F.cross_entropy(o, targets), o def reset_meters(): classerr.reset() meter_loss.reset() def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): reset_meters() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): print 'Training loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0]) # do validation at the end of each epoch reset_meters() engine.test(h, get_iterator(False)) print 'Testing loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0]) engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, get_iterator(True), maxepoch=10, optimizer=optimizer)
def test(model, testloader, loss_function, device): r"""Computes the accuracy and loss of the model for a given datatset. Arguments: model (:class:`torch.nn.Module`): nn model lossf testloader (:class:`torch.utils.data.DataLoader`): dataloader to iterate through the data loss_function (:class:`torch.nn._Loss`): function to compute loss device (:class:`torch.device`): the device to run inference on Returns: accuracy (float): accuracy of the network on given dataset """ model.eval() model.to(device) engine = Engine() def compute_loss(data): """Computes the loss from a given nn model.""" inputs = data[0] labels = data[1] inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) return loss_function(outputs, labels), outputs def on_start(state): print("Running inference ...") state['iterator'] = tqdm(state['iterator'], leave=False) class Accuracy(): _accuracy = 0.; _sample_size = 0. def on_forward(state): batch_size = state['sample'][1].shape[0] Accuracy._sample_size += batch_size Accuracy._accuracy += batch_size * get_accuracy(state['output'].cpu(), state['sample'][1].cpu()) engine.hooks['on_start'] = on_start engine.hooks['on_forward'] = on_forward engine.test(compute_loss, testloader) return Accuracy._accuracy / Accuracy._sample_size
def main(): opt = parser.parse_args() print 'parsed options:', vars(opt) # convert json data to python object epoch_step = json.loads(opt.epoch_step) num_classes = 10 if opt.dataset == 'CIFAR10' else 100 os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id # 0 # to prevent opencv from initializing CUDA in workers torch.randn(8).cuda() os.environ['CUDA_VISIBLE_DEVICES'] = '' # set it to empty string def create_iterator(mode): ds = create_dataset(opt, mode) return ds.parallel(batch_size=opt.batchSize, shuffle=mode, num_workers=opt.nthread, pin_memory=True) train_loader = create_iterator(True) test_loader = create_iterator(False) # deal with student first f_s, params_s, stats_s = resnet(opt.depth, opt.width, num_classes) # deal with teacher if opt.teacher_id != '': with open(os.path.join('logs', opt.teacher_id, 'log.txt'), 'r') as ff: line = ff.readline() r = line.find('json_stats') info = json.loads(line[r + 12:]) f_t = resnet(info['depth'], info['width'], num_classes)[0] model_data = torch.load( os.path.join('logs', opt.teacher_id, 'model.pt7')) params_t = model_data['params'] stats_t = model_data['stats'] # merge teacher and student params and stats params = {'student.' + k: v for k, v in params_s.iteritems()} for k, v in params_t.iteritems(): v.requires_grad = False params['teacher.' + k] = v stats = {'student.' + k: v for k, v in stats_s.iteritems()} stats.update({'teacher.' + k: v for k, v in stats_t.iteritems()}) def f(inputs, params, stats, mode): y_s, g_s = f_s(inputs, params, stats, mode, 'student.') y_t, g_t = f_t(inputs, params, stats, False, 'teacher.') return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)] else: f, params, stats = f_s, params_s, stats_s optimizable = [v for v in params.itervalues() if v.requires_grad] def create_optimizer(opt, lr): print 'creating optimizer with lr = ', lr if opt.optim_method == 'SGD': return torch.optim.SGD(optimizable, lr, 0.9, weight_decay=opt.weightDecay) elif opt.optim_method == 'Adam': return torch.optim.Adam(optimizable, lr) optimizer = create_optimizer(opt, opt.lr) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params, stats = state_dict['params'], state_dict['stats'] optimizer.load_state_dict(state_dict['optimizer']) print '\nParameters:' print pd.DataFrame([(key, v.size(), torch.typename(v.data)) for key, v in params.items()]) print '\nAdditional buffers:' print pd.DataFrame([(key, v.size(), torch.typename(v)) for key, v in stats.items()]) n_parameters = sum( [p.numel() for p in params_s.values() + stats_s.values()]) print '\nTotal number of parameters:', n_parameters meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') meters_at = [tnt.meter.AverageValueMeter() for i in range(3)] if not os.path.exists(opt.save): os.mkdir(opt.save) def h(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) if opt.teacher_id != '': # if there is teacher id y_s, y_t, loss_groups = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu)) loss_groups = [v.sum() for v in loss_groups] [m.add(v.data[0]) for m, v in zip(meters_at, loss_groups)] return distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \ + opt.beta * sum(loss_groups), y_s else: y = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu))[0] return F.cross_entropy(y, targets), y def log(t): torch.save( dict(params=params, stats=stats, optimizer=optimizer.state_dict(), epoch=t['epoch']), open(os.path.join(opt.save, 'model.pt7'), 'w')) z = vars(opt).copy() z.update(t) logname = os.path.join(opt.save, 'log.txt') with open(logname, 'a') as f: f.write('json_stats: ' + json.dumps(z) + '\n') print z def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classacc.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() [meter.reset() for meter in meters_at] state['iterator'] = tqdm(train_loader) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] optimizer = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() engine.test(h, test_loader) test_acc = classacc.value()[0] print log({ "train_loss": train_loss[0], "train_acc": train_acc[0], "test_loss": meter_loss.value()[0], "test_acc": test_acc, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), "at_losses": [m.value() for m in meters_at], }) print '==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \ (opt.save, state['epoch'], opt.epochs, test_acc) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, train_loader, opt.epochs, optimizer)
def main(): global CONST_STEP_FLAG CONST_STEP_FLAG = 0 # opt = parser.parse_args() # option note assert not ( opt.beta and opt.gamma ), "Can't support attention-transfer and rocket-launching together" print 'parsed options:', vars(opt) epoch_step = json.loads(opt.epoch_step) sigma_refine_step = json.loads(opt.sigma_refine_step) num_classes = 10 if opt.dataset == 'CIFAR10' else 100 os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id # to prevent opencv from initializing CUDA in workers torch.randn(8).cuda() os.environ['CUDA_VISIBLE_DEVICES'] = '' def create_iterator(mode): ds = create_dataset(opt, mode) return ds.parallel(batch_size=opt.batchSize, shuffle=mode, num_workers=opt.nthread, pin_memory=True) train_loader = create_iterator(True) test_loader = create_iterator(False) # deal with student first f_s, params_s, stats_s = resnet(opt.depth, opt.width, num_classes, opt.student_depth) # deal with teacher if opt.teacher_id != '': with open(os.path.join('logs', opt.teacher_id, 'log.txt'), 'r') as ff: line = ff.readline() r = line.find('json_stats') info = json.loads(line[r + 12:]) f_t = resnet(info['depth'], info['width'], num_classes)[0] model_data = torch.load( os.path.join('logs', opt.teacher_id, 'model.pt7')) params_t = model_data['params'] stats_t = model_data['stats'] # merge teacher and student params and stats params = {'student.' + k: v for k, v in params_s.iteritems()} for k, v in params_t.iteritems(): params['teacher.' + k] = Variable(v) stats = {'student.' + k: v for k, v in stats_s.iteritems()} stats.update({'teacher.' + k: v for k, v in stats_t.iteritems()}) def f(inputs, params, stats, mode): if opt.gamma: y_s, y_t_auto, g_s = f_s(inputs, params, stats, mode, 'student.') y_t, g_t = f_t(inputs, params, stats, False, 'teacher.') return y_s, y_t_auto, y_t else: y_s, g_s = f_s(inputs, params, stats, mode, 'student.') y_t, g_t = f_t(inputs, params, stats, False, 'teacher.') return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)] else: f, params, stats = f_s, params_s, stats_s optimizable = [v for v in params.itervalues() if v.requires_grad] def create_optimizer(opt, lr): print 'creating optimizer with lr = ', lr if opt.optim_method == 'SGD': return torch.optim.SGD(optimizable, lr, 0.9, weight_decay=opt.weightDecay) elif opt.optim_method == 'Adam': return torch.optim.Adam(optimizable, lr) optimizer = create_optimizer(opt, opt.lr) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params_tensors, stats = state_dict['params'], state_dict['stats'] for k, v in params.iteritems(): v.data.copy_(params_tensors[k]) optimizer.load_state_dict(state_dict['optimizer']) print '\nParameters:' print pd.DataFrame([(key, v.size(), torch.typename(v.data)) for key, v in params.items()]) print '\nAdditional buffers:' print pd.DataFrame([(key, v.size(), torch.typename(v)) for key, v in stats.items()]) n_parameters = sum(p.numel() for p in params_s.values()) print '\nTotal number of parameters:', n_parameters if opt.gamma: meter_loss_s = tnt.meter.AverageValueMeter() meter_loss_t = tnt.meter.AverageValueMeter() meter_loss_c = tnt.meter.AverageValueMeter() meter_loss_d = tnt.meter.AverageValueMeter() classacc_s = tnt.meter.ClassErrorMeter(accuracy=True) classacc_t = tnt.meter.ClassErrorMeter(accuracy=True) else: classacc = tnt.meter.ClassErrorMeter(accuracy=True) meter_loss = tnt.meter.AverageValueMeter() timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') meters_at = [tnt.meter.AverageValueMeter() for i in range(3)] if not os.path.exists(opt.save): os.mkdir(opt.save) def h(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) if opt.teacher_id != '': if opt.gamma: ys, y_t_auto, y_t = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu))[:3] loss_l2 = torch.nn.MSELoss() T = 4 loss_student = F.cross_entropy(ys, targets) loss_teacher = F.cross_entropy(y_t_auto, targets) loss_course = opt.beta * \ ((y_t_auto - ys) * (y_t_auto - ys)).sum() / opt.batchSize y_tech_temp = torch.autograd.Variable(y_t_auto.data, requires_grad=False) log_kd = rocket_distillation(ys, y_t, targets, opt.temperature, opt.alpha) return rocket_distillation(ys, y_t, targets, opt.temperature, opt.alpha) \ + F.cross_entropy(y_t_auto, targets) + F.cross_entropy(ys, targets) + opt.beta * ((y_tech_temp - ys) * ( y_tech_temp - ys)).sum() / opt.batchSize, (ys, y_t_auto, loss_student, loss_teacher, loss_course, log_kd) else: y_s, y_t, loss_groups = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu)) loss_groups = [v.sum() for v in loss_groups] [m.add(v.data[0]) for m, v in zip(meters_at, loss_groups)] return distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \ + opt.beta * sum(loss_groups), y_s else: if opt.gamma: ys, y = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu))[:2] loss_l2 = torch.nn.MSELoss() T = 4 loss_student = F.cross_entropy(ys, targets) loss_teacher = F.cross_entropy(y, targets) loss_course = opt.beta * \ ((y - ys) * (y - ys)).sum() / opt.batchSize if opt.grad_block: y_course = torch.autograd.Variable(y.data, requires_grad=False) else: y_course = y return F.cross_entropy(y, targets) + F.cross_entropy( ys, targets) + opt.beta * ( (y_course - ys) * (y_course - ys)).sum() / opt.batchSize, (ys, y, loss_student, loss_teacher, loss_course) else: y = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu))[0] return F.cross_entropy(y, targets), y def log(t, state): torch.save( dict(params={k: v.data for k, v in params.iteritems()}, stats=stats, optimizer=state['optimizer'].state_dict(), epoch=t['epoch']), open(os.path.join(opt.save, 'model.pt7'), 'w')) z = vars(opt).copy() z.update(t) logname = os.path.join(opt.save, 'log.txt') with open(logname, 'a') as f: f.write('json_stats: ' + json.dumps(z) + '\n') print z def on_sample(state): state['sample'].append(state['train']) if opt.gamma: def on_forward(state): classacc_s.add(state['output'][0].data, torch.LongTensor(state['sample'][1])) classacc_t.add(state['output'][1].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) meter_loss_s.add(state['output'][2].data[0]) meter_loss_t.add(state['output'][3].data[0]) meter_loss_c.add(state['output'][4].data[0]) def on_start_epoch(state): classacc_s.reset() classacc_t.reset() meter_loss.reset() meter_loss_s.reset() meter_loss_t.reset() meter_loss_c.reset() timer_train.reset() [meter.reset() for meter in meters_at] # state['iterator'] = tqdm(train_loader) epoch = state['epoch'] + 1 if epoch in sigma_refine_step: opt.running_sigma += opt.beta if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_loss_s = meter_loss_s.value() train_loss_t = meter_loss_t.value() train_loss_c = meter_loss_c.value() train_acc_s = classacc_s.value() train_acc_t = classacc_t.value() train_time = timer_train.value() meter_loss.reset() meter_loss_s.reset() meter_loss_t.reset() meter_loss_c.reset() classacc_s.reset() classacc_t.reset() timer_test.reset() engine.test(h, test_loader) test_acc_s = classacc_s.value()[0] test_acc_t = classacc_t.value()[0] print log( { "train_loss": train_loss[0], "train_acc_student": train_acc_s[0], "train_acc_teacher": train_acc_t[0], "test_loss": meter_loss.value()[0], "test_loss_student": meter_loss_s.value()[0], "test_loss_teacher": meter_loss_t.value()[0], "test_loss_course": meter_loss_c.value()[0], "test_acc_student": test_acc_s, "test_acc_teacher": test_acc_t, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), "at_losses": [m.value() for m in meters_at], }, state) print '==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \ (opt.save, state['epoch'], opt.epochs, test_acc_s) else: def on_forward(state): classacc.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() [meter.reset() for meter in meters_at] # state['iterator'] = tqdm(train_loader) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() engine.test(h, test_loader) test_acc = classacc.value()[0] print log( { "train_loss": train_loss[0], "train_acc": train_acc[0], "test_loss": meter_loss.value()[0], "test_acc": test_acc, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), "at_losses": [m.value() for m in meters_at], }, state) print '==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \ (opt.save, state['epoch'], opt.epochs, test_acc) def on_start(state): state['epoch'] = epoch engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, train_loader, opt.epochs, optimizer)
if __name__ == "__main__": from torch.autograd import Variable from torch.optim import Adam from torchnet.engine import Engine from torchvision.utils import make_grid from torchvision.datasets.mnist import MNIST from tqdm import tqdm import torchnet as tnt import h5py import os from collections import OrderedDict model = CapsuleNet() engine = Engine() meter_loss = tnt.meter.AverageValueMeter() mymeter = Mymeter(NUM_CLASSES) loss_func = F.binary_cross_entropy train_path = '/home/LAB/penghao/mars/metadata/test' train_dir = os.listdir(train_path) train_num = len(train_dir) index = 0 def get_iterator(mode): if mode: train_path = '/home/LAB/penghao/mars/metadata/train' dir = os.listdir(train_path) data = None labels = None
def main(): opt = parser.parse_args() print('parsed options:', vars(opt)) epoch_step = json.loads(opt.epoch_step) num_classes = 10 if opt.dataset == 'CIFAR10' else 100 os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id def create_iterator(train): return DataLoader(create_dataset(opt, train), batch_size=opt.batch_size, shuffle=train, num_workers=opt.nthread, pin_memory=torch.cuda.is_available()) train_loader = create_iterator(True) test_loader = create_iterator(False) f, params, stats = resnet(opt.depth, opt.width, num_classes) def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD(params.values(), lr, 0.9, weight_decay=opt.weight_decay) optimizer = create_optimizer(opt, opt.lr) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params_tensors, stats = state_dict['params'], state_dict['stats'] for k, v in params.items(): v.data.copy_(params_tensors[k]) optimizer.load_state_dict(state_dict['optimizer']) print('\nParameters:') print_tensor_dict(params) print('\nAdditional buffers:') print_tensor_dict(stats) n_parameters = sum(p.numel() for p in params.values()) print('\nTotal number of parameters:', n_parameters) meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') if not os.path.exists(opt.save): os.mkdir(opt.save) def h(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) y = data_parallel(f, inputs, params, stats, sample[2], list(range(opt.ngpu))) return F.cross_entropy(y, targets), y def log(t, state): torch.save( dict(params={k: v.data for k, v in params.items()}, stats=stats, optimizer=state['optimizer'].state_dict(), epoch=t['epoch']), open(os.path.join(opt.save, 'model.pt7'), 'wb')) z = vars(opt).copy() z.update(t) logname = os.path.join(opt.save, 'log.txt') with open(logname, 'a') as f: f.write('json_stats: ' + json.dumps(z) + '\n') print(z) def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classacc.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() state['iterator'] = tqdm(train_loader) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() engine.test(h, test_loader) test_acc = classacc.value()[0] print( log( { "train_loss": train_loss[0], "train_acc": train_acc[0], "test_loss": meter_loss.value()[0], "test_acc": test_acc, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), }, state)) print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \ (opt.save, state['epoch'], opt.epochs, test_acc)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, train_loader, opt.epochs, optimizer)
def main(): opt = parser.parse_args() print('parsed options:', vars(opt)) epoch_step = json.loads(opt.epoch_step) num_classes = 10 if opt.dataset == 'CIFAR10' else 100 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" ###multiple gpu os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id def create_iterator(mode): return DataLoader(create_dataset(opt, mode), opt.batch_size, shuffle=mode, num_workers=opt.nthread, pin_memory=torch.cuda.is_available()) train_loader = create_iterator(True) test_loader = create_iterator(False) # deal with student first f_s, params_s = resnet(opt.depth, opt.width, num_classes) # deal with teacher if opt.teacher_id: with open(os.path.join('logs', opt.teacher_id, 'log.txt'), 'r') as ff: line = ff.readline() r = line.find('json_stats') info = json.loads(line[r + 12:]) f_t = resnet(info['depth'], info['width'], num_classes)[0] model_data = torch.load( os.path.join('logs', opt.teacher_id, 'model.pt7')) params_t = model_data['params'] # merge teacher and student params params = {'student.' + k: v for k, v in params_s.items()} for k, v in params_t.items(): if not (k.startswith("teacher")): k = k.replace("student.", "") params['teacher.' + k] = v.detach().requires_grad_(False) def f(inputs, params, mode): y_s, g_s = f_s(inputs, params, mode, 'student.') with torch.no_grad(): y_t, g_t = f_t(inputs, params, False, 'teacher.') return y_s, y_t, [utils.at_loss(x, y) for x, y in zip(g_s, g_t)] else: f, params = f_s, params_s def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD((v for v in params.values() if v.requires_grad), lr, momentum=0.9, weight_decay=opt.weight_decay) optimizer = create_optimizer(opt, opt.lr) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params_tensors = state_dict['params'] for k, v in params.items(): v.data.copy_(params_tensors[k]) optimizer.load_state_dict(state_dict['optimizer']) print('\nParameters:') utils.print_tensor_dict(params) n_parameters = sum(p.numel() for p in list(params_s.values())) print('\nTotal number of parameters:', n_parameters) meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') meters_at = [tnt.meter.AverageValueMeter() for i in range(3)] opt.save = opt.save + "_" + opt.dataset + "_epochs_" + str(opt.epochs) if not os.path.exists(opt.save): os.mkdir(opt.save) writer = SummaryWriter(opt.save) def h(sample): inputs = utils.cast(sample[0], opt.dtype).detach() targets = utils.cast(sample[1], 'long') if opt.teacher_id != '': y_s, y_t, loss_groups = utils.data_parallel( f, inputs, params, sample[2], range(opt.ngpu)) loss_groups = [v.sum() for v in loss_groups] [m.add(v.item()) for m, v in zip(meters_at, loss_groups)] return utils.distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \ + opt.beta * sum(loss_groups), y_s else: y = utils.data_parallel(f, inputs, params, sample[2], range(opt.ngpu))[0] return F.cross_entropy(y, targets), y def log(t, state): torch.save( dict(params={k: v.data for k, v in params.items()}, optimizer=state['optimizer'].state_dict(), epoch=t['epoch']), os.path.join(opt.save, 'model.pt7')) #정해준 path에 모델을 save 한다. z = vars(opt).copy() z.update(t) logname = os.path.join(opt.save, 'log.txt') with open(logname, 'a') as f: f.write('json_stats: ' + json.dumps(z) + '\n') print(z) def on_sample(state): state['sample'].append(state['train']) #sample을 train상태에 올린다. def on_forward(state): classacc.add(state['output'].data, state['sample'][1]) # meter_loss.add(state['loss'].item()) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() [meter.reset() for meter in meters_at] state['iterator'] = tqdm(train_loader) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.mean train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() engine.test(h, test_loader) #upward test_acc = classacc.value()[0] writer.add_scalar('loss/train', train_loss, state['epoch']) writer.add_scalar('acc/train', train_acc[0], state['epoch']) writer.add_scalar('loss/test', meter_loss.mean, state['epoch']) writer.add_scalar('acc/test', test_acc, state['epoch']) print( log( { "train_loss": train_loss, "train_acc": train_acc[0], "test_loss": meter_loss.mean, "test_acc": test_acc, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), "at_losses": [m.value() for m in meters_at], }, state)) print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \ (opt.save, state['epoch'], opt.epochs, test_acc)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, train_loader, opt.epochs, optimizer) writer.close()
def main(): opt = parser.parse_args() print('parsed options:', vars(opt)) epoch_step = json.loads(opt.epoch_step) num_classes = 10 if opt.dataset == 'CIFAR10' else 100 log_step = 1 assert opt.subset_size in [100, 500, 1000, -1 ], 'subset size should be 100, 500, 1000 or -1' assert opt.subset_id in [1, 2, 3, 4, 5, -1], 'subset ide should be 1-5 or -1' if opt.subset_size in [100, 500, 1000]: log_step = 10000 // opt.subset_size torch.manual_seed(opt.seed) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id def create_iterator(mode): shuffle = mode and (opt.subset_size == -1 or opt.subset_id == -1) sampler = None if mode and not shuffle: ind = np.loadtxt('subsets/subset_' + str(opt.subset_size) + '_' + str(opt.subset_id) + '.txt', dtype=np.int64) sampler = SubsetRandomSampler(ind) return DataLoader(create_dataset(opt, mode), opt.batch_size, sampler=sampler, shuffle=shuffle, num_workers=opt.nthread, pin_memory=torch.cuda.is_available()) train_loader = create_iterator(True) test_loader = create_iterator(False) kwargs = {} if not opt.level is None: kwargs.update({'level': opt.level}) f, params = resnet(opt.depth, opt.width, num_classes, opt.dropout, **kwargs) def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD([v for v in params.values() if v.requires_grad], lr, momentum=0.9, weight_decay=opt.weight_decay, nesterov=opt.nesterov) optimizer = create_optimizer(opt, opt.lr) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params_tensors = state_dict['params'] for k, v in params.items(): if k in params_tensors: v.data.copy_(params_tensors[k]) optimizer.load_state_dict(state_dict['optimizer']) print('\nParameters:') print_tensor_dict(params) n_parameters = sum(p.numel() for p in params.values() if p.requires_grad) print('\nTotal number of parameters:', n_parameters) meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') if not os.path.exists(opt.save): os.mkdir(opt.save) def h(sample): inputs = cast(sample[0], opt.dtype) targets = cast(sample[1], 'long') y = data_parallel(f, inputs, params, sample[2], list(range(opt.ngpu))).float() return F.cross_entropy(y, targets), y def log(t, state): torch.save( dict(params={ k: v for k, v in params.items() if k.find('dct') == -1 }, epoch=t['epoch'], optimizer=state['optimizer'].state_dict()), os.path.join(opt.save, 'model.pt7')) z = vars(opt).copy() z.update(t) with open(os.path.join(opt.save, 'log.txt'), 'a') as flog: flog.write('json_stats: ' + json.dumps(z) + '\n') print(z) def on_sample(state): state['sample'].append(state['train']) def on_forward(state): loss = float(state['loss']) classacc.add(state['output'].data, state['sample'][1]) meter_loss.add(loss) if state['train']: state['iterator'].set_postfix(loss=loss) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() state['iterator'] = tqdm(train_loader, dynamic_ncols=True) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): if state['epoch'] % log_step == 0: train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() with torch.no_grad(): engine.test(h, test_loader) test_acc = classacc.value()[0] print( log( { "train_loss": train_loss[0], "train_acc": train_acc[0], "test_loss": meter_loss.value()[0], "test_acc": test_acc, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), }, state)) print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % (opt.save, state['epoch'], opt.epochs, test_acc)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, train_loader, opt.epochs, optimizer)
def main(cfg, cuda=torch.cuda.is_available()): ### flush cfg to output log file: tqdm.write(str(cfg), file=cfg['logfile']) tqdm.write('-' * 80, file=cfg['logfile']) ### define dataloader factory: def get_iterator(): # set up dataloader config: datasets = cfg['data_paths'] pin_mem = cuda nworkers = cfg['num_workers'] # (possibly) concatenate datasets together: ds = SeqTensorDataset(torch.load(datasets[0][0]), torch.load(datasets[0][1]), torch.load(datasets[0][2]), torch.load(datasets[0][3])) for dataset in datasets[1:]: ds += SeqTensorDataset(torch.load(dataset[0]), torch.load(dataset[1]), torch.load(dataset[2]), torch.load(dataset[3])) # return a dataloader iterating over datasets; pagelock memory location if GPU detected: return DataLoader(ds, batch_size=cfg['batch_size'], shuffle=True, num_workers=nworkers, collate_fn=sequence_collate_fn, pin_memory=pin_mem) ### build RawCTCNet model: in_dim = 1 layers = [(256, 256, d, 3) for d in [1, 2, 4, 8, 16, 32, 64]] * cfg['num_stacks'] num_labels = 5 out_dim = 512 network = RawCTCNet(in_dim, num_labels, layers, out_dim, input_kw=1, input_dil=1, positions=True, softmax=False, causal=False, batch_norm=True) print("Constructed network.") if cuda: print("CUDA detected; placed network on GPU.") network.cuda() if cfg['model'] is not None: print("Loading model file...") try: network.load_state_dict(torch.load(cfg['model'])) except: print( "ERR: could not restore model. Check model datatype/dimensions." ) ### build CTC loss function and model evaluation function: ctc_loss_fn = CTCLoss() print("Constructed CTC loss function.") maybe_gpu = lambda tsr, has_cuda: tsr if not has_cuda else tsr.cuda() def model_loss(sample): # unpack inputs and wrap in Variables: signals_, signal_lengths_, sequences_, sequence_lengths_ = sample signals = Variable(maybe_gpu(signals_.permute(0, 2, 1), cuda), volatile=True) # BxTxD => BxDxT signal_lengths = Variable(signal_lengths_, volatile=True) sequences = Variable(concat_labels(sequences_, sequence_lengths_), volatile=True) sequence_lengths = Variable(sequence_lengths_, volatile=True) # compute predicted labels: transcriptions = network(signals).permute(2, 0, 1) # Permute: BxDxT => TxBxD # compute CTC loss and return: loss = ctc_loss_fn(transcriptions, sequences.int(), signal_lengths.int(), sequence_lengths.int()) return loss, transcriptions ### build beam search decoder: beam_labels = [' ', 'A', 'G', 'C', 'T'] beam_blank_id = 0 beam_decoder = CTCBeamDecoder(beam_labels, beam_width=100, blank_id=beam_blank_id, num_processes=cfg['num_workers']) print("Constructed CTC beam search decoder.") ### build engine, meters, and hooks: engine = Engine() # Wrap a tqdm meter around the losses: def on_start(state): network.eval() state['iterator'] = tqdm(state['iterator']) # (Currently don't do anything w/r/t the sample.) def on_sample(state): pass # occasionally log the loss value and perform beam search decoding: def on_forward(state): if (state['t'] % cfg['print_every'] == 0): # log the ctc loss: tqdm.write("Step {0} | Loss: {1}".format(state['t'], state['loss'].data[0], file=cfg['logfile'])) # beam search decoding: _, logit_lengths_t, seq_t, seq_lengths_t = state['sample'] scores = mask_padding(state['output'].permute(1, 0, 2), logit_lengths_t, fill_logit_idx=0) logits = F.softmax(scores, dim=2) _nt_dict_ = {0: ' ', 1: 'A', 2: 'G', 3: 'C', 4: 'T'} def convert_to_string(toks, voc, num): try: nt = ''.join([voc[t] for t in toks[0:num]]) except: nt = '' return nt try: true_nts = labels2strings(seq_t, lookup=_nt_dict_) amax_nts = labels2strings(argmax_decode(logits), lookup=_nt_dict_) beam_result, beam_scores, beam_times, beam_lengths = beam_decoder.decode( logits.data) pred_nts = [ convert_to_string(beam_result[k][0], _nt_dict_, beam_lengths[k][0]) for k in range(len(beam_result)) ] for i in range(min(len(true_nts), len(pred_nts))): tqdm.write("True Seq: {0}".format(true_nts[i]), file=cfg['logfile']) tqdm.write("Beam Seq: {0}".format(pred_nts[i]), file=cfg['logfile']) tqdm.write("Amax Seq: {0}".format(amax_nts[i]), file=cfg['logfile']) tqdm.write( ("- " * 10 + "Local Beam Alignment" + " -" * 10), file=cfg['logfile']) tqdm.write(ssw(true_nts[i], pred_nts[i]), file=cfg['logfile']) tqdm.write("= " * 40, file=cfg['logfile']) except: tqdm.write("(WARN: Could not parse batch; skipping...)", file=cfg['logfile']) # (Currently don't do anything at end of epoch.) def on_end(state): pass print("Constructed engine. Running validation loop...") ### run validation loop: engine.hooks['on_start'] = on_start engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_end'] = on_end engine.test(model_loss, get_iterator())
from torchnet.engine import Engine from torchnet.logger import VisdomPlotLogger, VisdomLogger from torchvision.utils import make_grid from torchvision.datasets.mnist import MNIST from tqdm import tqdm import torchnet as tnt model = CapsuleNet() # model.load_state_dict(torch.load('epochs/epoch_327.pt')) model.cuda() print("# parameters:", sum(param.numel() for param in model.parameters())) optimizer = Adam(model.parameters()) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(NUM_CLASSES, normalized=True) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) train_error_logger = VisdomPlotLogger('line', opts={'title': 'Train Accuracy'}) test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}) test_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Test Accuracy'}) confusion_logger = VisdomLogger('heatmap', opts={'title': 'Confusion matrix', 'columnnames': list(range(NUM_CLASSES)), 'rownames': list(range(NUM_CLASSES))}) ground_truth_logger = VisdomLogger('image', opts={'title': 'Ground Truth'}) reconstruction_logger = VisdomLogger('image', opts={'title': 'Reconstruction'}) capsule_loss = CapsuleLoss()
def main(timestamp, batch_size, mini_batch_size=128): logging.debug('\tLoading %s data iterators' % CONFIG['dataset'].name) # Data loading dataset_name = CONFIG['dataset'].name dataroot = CONFIG['dataset'].datadir use_gpu = CONFIG['general'].use_gpu num_gpus = len(CONFIG['general'].gpus.split(',')) if use_gpu else 0 num_classes = 10 if dataset_name == 'CIFAR10' else 100 # TODO make dataset-dependent lol train_loader, test_loader = create_data_iterators( dataroot, # TODO add dataset_name batch_size, num_classes, num_gpus) # Model construction model_name = CONFIG['model'].name model, model_params = create_model(model_name, num_classes) # Create optimizers logging.debug('\tCreating optimizers...') lr = CONFIG['training'].learning_rate momentum = CONFIG['training'].momentum weight_decay = CONFIG['training'].weight_decay optimizer_type = CONFIG['training'].optimizer # TODO clean this up so that other_params are extracted automatically if optimizer_type == 'SGD': other_params = None elif optimizer_type == 'NoisySGD': other_params = {"noise_factor": CONFIG['noisysgd'].noise_factor} elif optimizer_type == 'ReservoirSGD': if num_gpus == 0: raise ValueError('Need at least one GPU for now for ReservoirSGD!') other_params = { 'scale': CONFIG['reservoir'].scale, 'max_reservoir_size': CONFIG['reservoir'].max_reservoir_size, 'num_gradients_to_sample': CONFIG['reservoir'].num_gradients_to_sample, 'distributed': CONFIG['reservoir'].distributed } elif optimizer_type == 'HessianVecSGD': other_params = {"noise_factor": CONFIG['hessian_vec'].noise_factor} else: raise ValueError('Unsupported optimizer: %s' % optimizer_type) create_optimizer = create_optimizer_fn(model_params, momentum, weight_decay, optimizer_type, mini_batch_size, other_params) optimizer = create_optimizer(lr) epoch = 0 iteration = 0 # Load previous model checkpoint if it exists checkpoint = CONFIG['model'].checkpoint if checkpoint is not None: logging.info('\tLoading train state from checkpoint: %s' % checkpoint) model_params, optimizer = load_checkpoint(checkpoint, model_params, optimizer) # Calculate number of parameters in model num_parameters = sum(p.numel() for p in model_params.values()) logging.debug('\tNumber of parameters in model: %d' % int(num_parameters)) # Set up telemetry things meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') # Create log directory for checkpoints save_dir = create_log_dirs(CONFIG['logging'].save_dir, batch_size, timestamp, other_params) logging.info('\tLogging to: {}'.format(save_dir)) # Create log/test functions that we use in torchnet engine hooks cross_entropy = create_cross_entropy_fn(model, model_name, model_params, num_gpus) log = create_log_fn(model_params, save_dir) # Create torchnet engine hooks engine = Engine( create_graph=CONFIG['training'].optimizer == 'HessianVecSGD', mini_batch_size=mini_batch_size) epoch_step_orig = CONFIG['training'].epoch_step if isinstance(epoch_step_orig, int): epoch_step = [epoch_step_orig] else: epoch_step = list(map(int, CONFIG['training'].epoch_step.split(','))) # CALCULATE NUMBER OF EPOCHS BASED ON ITERATIONS if hasattr(CONFIG['training'], 'iterations'): logging.debug('\tUsing iterations: {}'.format( CONFIG['training'].iterations)) iters_per_epoch = TRAINING_SIZE // batch_size batch_period = 1 # batch_size // mini_batch_size epochs = batch_period * CONFIG['training'].iterations // iters_per_epoch # run at least this many epochs epochs = max(epochs, CONFIG['training'].epochs) else: epochs = CONFIG['training'].epochs lr_decay_ratio = CONFIG['training'].lr_decay_ratio logging.info('\tRUNNING FOR {} EPOCHS'.format(epochs)) # on_sample = create_on_sample_fn() on_forward = create_on_forward_fn(classacc, meter_loss) on_start = create_on_start_fn(epoch, iteration) on_start_epoch = create_on_start_epoch_fn(classacc, meter_loss, timer_train, train_loader, epoch_step, lr_decay_ratio, create_optimizer) on_update = create_on_update_fn(engine, cross_entropy, train_loader, test_loader, batch_size, log, classacc, meter_loss, timer_train, timer_test, save_dir, period=CONFIG['logging'].evaluation_iters, iterations=CONFIG['training'].iterations) # Hook the torchnet engine up # engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_start'] = on_start engine.hooks['on_update'] = on_update # Start the training process! engine.train(cross_entropy, train_loader, epochs, optimizer)
def main(): opt = parser.parse_args() print('parsed options:', vars(opt)) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id torch.randn(8).cuda() os.environ['CUDA_VISIBLE_DEVICES'] = '' epoch_step = json.loads(opt.epoch_step) if not os.path.exists(opt.save): os.mkdir(opt.save) f_s, params_s, stats_s = define_student(opt.depth, opt.width) f_t, params_t = define_teacher(opt.teacher_params) params = {'student.' + k: v for k, v in params_s.items()} stats = {'student.' + k: v for k, v in stats_s.items()} params.update({'teacher.' + k: v for k, v in params_t.items()}) optimizable = [v for v in params.values() if v.requires_grad] def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return torch.optim.SGD(optimizable, lr, 0.9, weight_decay=opt.weightDecay) optimizer = create_optimizer(opt, opt.lr) iter_train = get_iterator(opt, True) iter_test = get_iterator(opt, False) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params_tensors, stats = state_dict['params'], state_dict['stats'] for k, v in params.items(): v.data.copy_(params_tensors[k]) optimizer.load_state_dict(state_dict['optimizer']) print('\nParameters:') print( pd.DataFrame([(key, v.size(), torch.typename(v.data)) for key, v in list(params.items())])) print('\nAdditional buffers:') print( pd.DataFrame([(key, v.size(), torch.typename(v)) for key, v in list(stats.items())])) n_parameters = sum([p.numel() for p in optimizable + list(stats.values())]) print('\nTotal number of parameters:', n_parameters) meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(topk=[1, 5], accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') meters_at = [tnt.meter.AverageValueMeter() for i in range(4)] def f(inputs, params, stats, mode): y_s, g_s = f_s(inputs, params, stats, mode, 'student.') y_t, g_t = f_t(inputs, params, 'teacher.') return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)] def h(sample): inputs = Variable(sample[0].cuda()) targets = Variable(sample[1].cuda().long()) y_s, y_t, loss_groups = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu)) loss_groups = [v.sum() for v in loss_groups] [m.add(v.data[0]) for m, v in zip(meters_at, loss_groups)] return distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \ + opt.beta * sum(loss_groups), y_s def log(t, state): torch.save( dict(params={k: v.data for k, v in params.items()}, stats=stats, optimizer=state['optimizer'].state_dict(), epoch=t['epoch']), os.path.join(opt.save, 'model.pt7')) z = vars(opt).copy() z.update(t) logname = os.path.join(opt.save, 'log.txt') with open(logname, 'a') as f: f.write('json_stats: ' + json.dumps(z) + '\n') print(z) def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classacc.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() [meter.reset() for meter in meters_at] state['iterator'] = tqdm(iter_train) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() engine.test(h, iter_test) print( log( { "train_loss": train_loss[0], "train_acc": train_acc, "test_loss": meter_loss.value()[0], "test_acc": classacc.value(), "epoch": state['epoch'], "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), "at_losses": [m.value() for m in meters_at], }, state)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, iter_train, opt.epochs, optimizer)
def main(): opt = parser.parse_args() print('parsed options:', vars(opt)) epoch_step = json.loads(opt.epoch_step) num_classes = 10 if opt.dataset == 'CIFAR10' else 100 torch.manual_seed(opt.seed) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id def create_iterator(mode): return DataLoader(create_dataset(opt, mode), opt.batch_size, shuffle=mode, num_workers=opt.nthread, pin_memory=torch.cuda.is_available()) train_loader = create_iterator(True) test_loader = create_iterator(False) if opt.activation_dropout: print('[*********] Using activation dropout') f, params = resnet(opt.depth, opt.width, num_classes, opt.dropout_prob, opt.activation_dropout) def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD([v for v in params.values() if v.requires_grad], lr, momentum=0.9, weight_decay=opt.weight_decay) optimizer = create_optimizer(opt, opt.lr) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params_tensors = state_dict['params'] for k, v in params.items(): v.data.copy_(params_tensors[k]) optimizer.load_state_dict(state_dict['optimizer']) print('\nParameters:') print_tensor_dict(params) n_parameters = sum(p.numel() for p in params.values() if p.requires_grad) print('\nTotal number of parameters:', n_parameters) meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') if not os.path.exists(opt.save): os.mkdir(opt.save) def h(sample): inputs = cast(sample[0], opt.dtype) targets = cast(sample[1], 'long') y = data_parallel(f, inputs, params, sample[2], list(range(opt.ngpu))).float() return F.cross_entropy(y, targets), y def log(t, state): torch.save(dict(params=params, epoch=t['epoch'], optimizer=state['optimizer'].state_dict()), os.path.join(opt.save, 'model.pt7')) z = {**vars(opt), **t} with open(os.path.join(opt.save, 'log.txt'), 'a') as flog: flog.write('json_stats: ' + json.dumps(z) + '\n') print(z) def on_sample(state): state['sample'].append(state['train']) def on_forward(state): loss = float(state['loss']) classacc.add(state['output'].data, state['sample'][1]) meter_loss.add(loss) if state['train']: state['iterator'].set_postfix(loss=loss) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() state['iterator'] = tqdm(train_loader, dynamic_ncols=True) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() with torch.no_grad(): engine.test(h, test_loader) test_acc = classacc.value()[0] print(log({ "train_loss": train_loss[0], "train_acc": train_acc[0], "test_loss": meter_loss.value()[0], "test_acc": test_acc, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), }, state)) print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % (opt.save, state['epoch'], opt.epochs, test_acc)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, train_loader, opt.epochs, optimizer)
def main(): ###Initialization device = torch.device(args.device) My_transform = transforms.Compose([ transforms.ToTensor(), # default : range [0, 255] -> [0.0,1.0] ]) Train_data = FashionMnistread(True, transform=My_transform) Test_data = FashionMnistread(False, transform=My_transform) Train_dataloader = DataLoader(dataset=Train_data, batch_size=args.n_batches, shuffle=False) Test_dataloader = DataLoader(dataset=Test_data, batch_size=args.n_batches, shuffle=False) def get_iterator(mode): if mode is True: return Train_dataloader elif mode is False: return Test_dataloader from torchsummary import summary _model = My_Model(num_of_class=args.n_classes) _model.to(device) summary(_model, input_size=(1, 28, 28)) optimizer = torch.optim.SGD(_model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() engine = Engine() meter_loss = tnt.meter.AverageValueMeter() classerr = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(args.n_classes, normalized=True) plotLogger = Visualier(num_classes=args.n_classes) writelogger = Customized_Logger(file_name=args.log_file) ###End Initialization def h(sample): data, classes, training = sample _model.train() if training else _model.eval() labels = torch.LongTensor(classes).to(device) data = data.to(device).float() f_class = _model(data) loss = criterion(f_class, labels) p_class = F.softmax(f_class, dim=1) return loss, p_class def reset_meters(): classerr.reset() meter_loss.reset() confusion_meter.reset() def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) confusion_meter.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].item()) def on_start_epoch(state): reset_meters() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): train_acc = classerr.value()[0] train_err = meter_loss.value()[0] # do validation at the end of each epoch reset_meters() engine.test(h, get_iterator(False)) val_acc = classerr.value()[0] val_err = meter_loss.value()[0] plotLogger.plot(train_acc=train_acc, train_err=train_err, val_acc=val_acc, val_err=val_err, confusion=confusion_meter.value(), epoch=state['epoch']) writelogger.update(train_acc=train_acc, train_err=train_err, val_acc=val_acc, val_err=val_err, epoch=state['epoch'], model=_model) engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, get_iterator(True), maxepoch=args.n_epoches, optimizer=optimizer)
def main(): opt = parser.parse_args() print('parsed options:', vars(opt)) epoch_step = json.loads(opt.epoch_step) num_classes = 10 if opt.dataset == 'CIFAR10' else 100 os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id def create_iterator(train): return DataLoader(create_dataset(opt, train), batch_size=opt.batch_size, shuffle=train, num_workers=opt.nthread, pin_memory=torch.cuda.is_available()) train_loader = create_iterator(True) test_loader = create_iterator(False) f, params, stats = resnet(opt.depth, opt.width, num_classes) def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD(params.values(), lr, 0.9, weight_decay=opt.weight_decay) optimizer = create_optimizer(opt, opt.lr) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params_tensors, stats = state_dict['params'], state_dict['stats'] for k, v in params.items(): v.data.copy_(params_tensors[k]) optimizer.load_state_dict(state_dict['optimizer']) print('\nParameters:') print_tensor_dict(params) print('\nAdditional buffers:') print_tensor_dict(stats) n_parameters = sum(p.numel() for p in params.values()) print('\nTotal number of parameters:', n_parameters) meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') if not os.path.exists(opt.save): os.mkdir(opt.save) def h(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) y = data_parallel(f, inputs, params, stats, sample[2], list(range(opt.ngpu))) return F.cross_entropy(y, targets), y def log(t, state): torch.save(dict(params={k: v.data for k, v in params.items()}, stats=stats, optimizer=state['optimizer'].state_dict(), epoch=t['epoch']), open(os.path.join(opt.save, 'model.pt7'), 'wb')) z = vars(opt).copy(); z.update(t) logname = os.path.join(opt.save, 'log.txt') with open(logname, 'a') as f: f.write('json_stats: ' + json.dumps(z) + '\n') print(z) def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classacc.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() state['iterator'] = tqdm(train_loader) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() engine.test(h, test_loader) test_acc = classacc.value()[0] print(log({ "train_loss": train_loss[0], "train_acc": train_acc[0], "test_loss": meter_loss.value()[0], "test_acc": test_acc, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), }, state)) print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % \ (opt.save, state['epoch'], opt.epochs, test_acc)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, train_loader, opt.epochs, optimizer)
def main(): args = get_args() device = torch.device("cuda:1") # device = torch.device("cpu") model = SequenceEncoder(3, 2, device) n_data = 10 data = get_toydata(n_data, device) teacher = [reverse_tensor(seq, device) for seq in data] training_data = (data, teacher) optim_params = { "params": model.parameters(), "weight_decay": args.weight_decay, "lr": args.lr, } optimizer = torch.optim.Adam(**optim_params) meter_loss = torchnet.meter.AverageValueMeter() port = 8097 train_loss_logger = VisdomPlotLogger( 'line', port=port, opts={'title': 'encoder_toy - train loss'}) def network(sample): x = sample[0] # sequence t = sample[1] # target sequence y, mu, logvar = model(x) loss = get_loss(y, t, mu, logvar) o = y, mu, logvar return loss, o def reset_meters(): meter_loss.reset() def on_sample(state): state['sample'] = list(state['sample']) state['sample'].append(state['train']) model.zero_grad() model.init_hidden() def on_forward(state): loss_value = state['loss'].data meter_loss.add(state['loss'].data) def on_start_epoch(state): reset_meters() if 'dataset' not in state: dataset = state['iterator'] state['dataset'] = dataset dataset = state['dataset'] state['iterator'] = tqdm(zip(*dataset)) def on_end_epoch(state): loss_value = meter_loss.value()[0] epoch = state['epoch'] print(f'loss[{epoch}]: {loss_value:.4f}') train_loss_logger.log(epoch, loss_value) dataset = state['dataset'] state['iterator'] = tqdm(zip(*dataset)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(network, training_data, maxepoch=args.epochs, optimizer=optimizer)
else: state_dict = checkpoint model.load_state_dict(state_dict=state_dict, strict=False) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch - 1)) else: print("=> no checkpoint found at '{}'".format(args.resume)) print(args) if len(args.gpus) > 0: model.cuda() cudnn.benchmark = True if len(args.gpus) > 1: model = nn.DataParallel(model, device_ids=args.gpus).cuda() engine = Engine() meter_loss = tnt.meter.AverageValueMeter() topk = [1, 5] classerr = tnt.meter.ClassErrorMeter(topk=topk, accuracy=False) # default is also False confusion_meter = tnt.meter.ConfusionMeter(num_classes[args.dataset], normalized=True) if args.visdom: if args.log_name == '': args.log_name = args.build_type train_loss_logger = VisdomPlotLogger( 'line', opts={'title': '[{}] Train Loss'.format(args.log_name)}) train_err_logger = VisdomPlotLogger( 'line', opts={'title': '[{}] Train Class Error'.format(args.log_name)})
from torchnet.engine import Engine from torchnet.logger import VisdomPlotLogger, VisdomLogger from torchvision.utils import make_grid from torchvision.datasets.mnist import MNIST from tqdm import tqdm import torchnet as tnt model = CapsuleNet() # model.load_state_dict(torch.load('epochs/epoch_327.pt')) model.cuda() print("# parameters:", sum(param.numel() for param in model.parameters())) optimizer = Adam(model.parameters()) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(NUM_CLASSES, normalized=True) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) train_error_logger = VisdomPlotLogger('line', opts={'title': 'Train Accuracy'}) test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}) test_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Test Accuracy'}) confusion_logger = VisdomLogger('heatmap', opts={ 'title': 'Confusion matrix', 'columnnames': list(range(NUM_CLASSES)),
def main(): """Train a simple Hybrid Scattering + CNN model on MNIST. Scattering features are normalized by batch normalization. The model achieves 99.6% testing accuracy after 10 epochs. """ meter_loss = tnt.meter.AverageValueMeter() classerr = tnt.meter.ClassErrorMeter(accuracy=True) scat = Scattering(M=28, N=28, J=2).cuda() K = 81 params = { 'conv1.weight': conv_init(K, 64, 1), 'conv1.bias': torch.zeros(64), 'bn.weight': torch.Tensor(K).uniform_(), 'bn.bias': torch.zeros(K), 'linear2.weight': linear_init(64*7*7, 512), 'linear2.bias': torch.zeros(512), 'linear3.weight': linear_init(512, 10), 'linear3.bias': torch.zeros(10), } stats = {'bn.running_mean': torch.zeros(K).cuda(), 'bn.running_var': torch.ones(K).cuda()} for k, v in params.items(): params[k] = Variable(v.cuda(), requires_grad=True) def h(sample): x = scat(sample[0].float().cuda().unsqueeze(1) / 255.0).squeeze(1) inputs = Variable(x) targets = Variable(torch.LongTensor(sample[1]).cuda()) o = f(inputs, params, stats, sample[2]) return F.cross_entropy(o, targets), o def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): classerr.reset() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): print 'Training accuracy:', classerr.value() def on_end(state): print 'Training' if state['train'] else 'Testing', 'accuracy' print classerr.value() optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_end'] = on_end print 'Training:' engine.train(h, get_iterator(True), 10, optimizer) print 'Testing:' engine.test(h, get_iterator(False))
num_workers=12, batch_size=64, shuffle=False) model = Net(upscale_factor=UPSCALE_FACTOR) criterion = AdjacentFrameLoss() if torch.cuda.is_available(): model = model.cuda() criterion = criterion.cuda() print('# parameters:', sum(param.numel() for param in model.parameters())) optimizer = optim.Adam(model.parameters(), lr=1e-2) scheduler = MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() meter_psnr = PSNRMeter() train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) train_psnr_logger = VisdomPlotLogger('line', opts={'title': 'Train PSNR'}) val_loss_logger = VisdomPlotLogger('line', opts={'title': 'Val Loss'}) val_psnr_logger = VisdomPlotLogger('line', opts={'title': 'Val PSNR'}) engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(processor, train_loader,
def main(n): viz = Visdom() params = { 'conv0.weight': conv_init(1, 50, 5), 'conv0.bias': torch.zeros(50), 'conv1.weight': conv_init(50, 50, 5), 'conv1.bias': torch.zeros(50), 'linear2.weight': linear_init(800, 512), 'linear2.bias': torch.zeros(512), 'linear3.weight': linear_init(512, 10), 'linear3.bias': torch.zeros(10), } # 创建参数字典 conv_init 和 linear_init 采用 He正规 params = {k: Variable(v, requires_grad=True) for k, v in params.items()} # torch.autograd.Variable Tensor 转 Variable if n == 1: optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) if n == 2: optimizer = torch.optim.Adam(params.values(), lr=0.001, betas=(0.9, 0.99)) if n == 3: optimizer = torch.optim.RMSprop(params.values(), lr=0.01, alpha=0.9) # 方法:SGD engine = Engine() # Engine给训练过程提供了一个模板,该模板建立了model,DatasetIterator,Criterion和Meter之间的联系 meter_loss = tnt.meter.AverageValueMeter() # 用于统计任意添加的变量的方差和均值,可以用来测量平均损失等 classerr = tnt.meter.ClassErrorMeter(accuracy=True) # 该meter用于统计分类误差 confusion_meter = tnt.meter.ConfusionMeter(10, normalized=True) # 多类之间的混淆矩阵 port = 8097 # 端口 train_loss_logger = VisdomPlotLogger('line', port=port, opts={}, win='102') # 定义win,name不能在这里设置,应该在这里的opts把标签legend设置完毕: viz.update_window_opts( win='101', opts=dict( legend=['Apples', 'Pears'], xtickmin=0, xtickmax=1, xtickstep=0.5, ytickmin=0, ytickmax=1, ytickstep=0.5, markersymbol='cross-thin-open', ), ) # train_loss 折线 train_err_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Train Class Error' }) # train_err 折线 test_loss_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Test Loss'}) # test_loss 折线 test_err_logger = VisdomPlotLogger( 'line', port=port, opts={'title': 'Test Class Error'}, ) # test_err 折线 confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': 'Confusion matrix', 'columnnames': list(range(10)), 'rownames': list(range(10)) }) # 误判信息 def h(sample): # 数据获取, f(参数,输入,mode), o为结果 inputs = Variable(sample[0].float() / 255.0) targets = Variable(torch.LongTensor(sample[1])) o = f(params, inputs, sample[2]) return F.cross_entropy(o, targets), o # 返回Loss,o def reset_meters(): # meter重置 classerr.reset() meter_loss.reset() confusion_meter.reset() # hooks = { # ['on_start'] = function() end, --用于训练开始前的设置和初始化 # ['on_start_epoch'] = function()end, -- 每一个epoch前的操作 # ['on_sample'] = function()end, -- 每次采样一个样本之后的操作 # ['on_forward'] = function()end, -- 在model: forward()之后的操作 # ?['onForwardCriterion'] = function()end, -- 前向计算损失函数之后的操作 # ?['onBackwardCriterion'] = function()end, -- 反向计算损失误差之后的操作 # ['on_backward'] = function()end, -- 反向传递误差之后的操作 # ['on_update'] = function()end, -- 权重参数更新之后的操作 # ['on_end_epoch'] = function()end, -- 每一个epoch结束时的操作 # ['on_end'] = function()end, -- 整个训练过程结束后的收拾现场 # } # state = { # ['network'] = network, --设置了model # ['criterion'] = criterion, -- 设置损失函数 # ['iterator'] = iterator, -- 数据迭代器 # ['lr'] = lr, -- 学习率 # ['lrcriterion'] = lrcriterion, -- # ['maxepoch'] = maxepoch, --最大epoch数 # ['sample'] = {}, -- 当前采集的样本,可以在onSample中通过该阈值查看采样样本 # ['epoch'] = 0, -- 当前的epoch # ['t'] = 0, -- 已经训练样本的个数 # ['training'] = true - - 训练过程 # } # def train(self, network, iterator, maxepoch, optimizer): # state = { # 'network': network, # 'iterator': iterator, # 'maxepoch': maxepoch, # 'optimizer': optimizer, # 'epoch': 0, # epoch # 't': 0, # sample # 'train': True, # } def on_sample(state): # 每次采样一个样本之后的操作 state['sample'].append(state['train']) # 样本采集之后训练 if state.get('epoch') != None and state['t'] > 10: if n == 1: train_loss_logger.log(state['t'], meter_loss.value()[0], name="SGD") if n == 2: train_loss_logger.log(state['t'], meter_loss.value()[0], name="Adam") if n == 3: train_loss_logger.log(state['t'], meter_loss.value()[0], name="RMSprop") reset_meters() def on_forward(state): # 在model: forward()之后的操作 classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) confusion_meter.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): # 每一个epoch前的操作 reset_meters() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): # 每一个epoch结束时的操作 print('Training loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])) # train_loss_logger.log(state['epoch'], meter_loss.value()[0]) # train_err_logger.log(state['epoch'], classerr.value()[0]) # do validation at the end of each epoch reset_meters() engine.test(h, get_iterator(False)) # test_loss_logger.log(state['epoch'], meter_loss.value()[0]) # test_err_logger.log(state['epoch'], classerr.value()[0]) # confusion_logger.log(confusion_meter.value()) print('Testing loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])) engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, get_iterator(True), maxepoch=1, optimizer=optimizer)
def main(): st = time.time() opt = parser.parse_args() epoch_step = json.loads(opt.epoch_step) print('parsed options:', vars(opt)) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id epoch_step = json.loads(opt.epoch_step) if not os.path.exists(opt.save): os.mkdir(opt.save) f_s, params_s = define_student(opt.depth, opt.width) f_t, params_t = define_teacher(opt.teacher_params) params = {'student.'+k: v for k, v in params_s.items()} params.update({'teacher.'+k: v for k, v in params_t.items()}) params = OrderedDict((k, p.cuda().detach().requires_grad_(p.requires_grad)) for k, p in params.items()) optimizable = [v for v in params.values() if v.requires_grad] def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD(optimizable, lr, momentum=0.9, weight_decay=opt.weight_decay) optimizer = create_optimizer(opt, opt.lr) iter_train = get_iterator(opt.imagenetpath, opt.batch_size, opt.nthread, True) iter_test = get_iterator(opt.imagenetpath, opt.batch_size, opt.nthread, False) epoch = 0 if opt.resume != '': state_dict = torch.load(opt.resume) epoch = state_dict['epoch'] params_tensors = state_dict['params'] for k, v in params.items(): v.data.copy_(params_tensors[k]) optimizer.load_state_dict(state_dict['optimizer']) print('\nParameters:') utils.print_tensor_dict(params) n_parameters = sum(p.numel() for p in optimizable) print('\nTotal number of parameters:', n_parameters) meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(topk=[1, 5], accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') meters_at = [tnt.meter.AverageValueMeter() for i in range(4)] def f(inputs, params, mode): y_s, g_s = f_s(inputs, params, mode, 'student.') with torch.no_grad(): y_t, g_t = f_t(inputs, params, 'teacher.') return y_s, y_t, [utils.at_loss(x, y) for x, y in zip(g_s, g_t)] def h(sample): inputs, targets, mode = sample inputs = inputs.cuda().detach() targets = targets.cuda().long().detach() y_s, y_t, loss_groups = utils.data_parallel(f, inputs, params, mode, range(opt.ngpu)) loss_groups = [v.sum() for v in loss_groups] [m.add(v.item()) for m,v in zip(meters_at, loss_groups)] return utils.distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \ + opt.beta * sum(loss_groups), y_s def log(t, state): torch.save(dict(params={k: v.data for k, v in params.items()}, optimizer=state['optimizer'].state_dict(), epoch=t['epoch']), os.path.join(opt.save, 'model.pt7')) z = vars(opt).copy(); z.update(t) logname = os.path.join(opt.save, 'log.txt') with open(logname, 'a') as f: f.write('json_stats: ' + json.dumps(z) + '\n') print(z) def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classacc.add(state['output'].data, state['sample'][1]) loss = state['loss'].item() meter_loss.add(loss) if state['train']: state['iterator'].set_postfix(loss=loss) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() meter_loss.reset() timer_train.reset() [meter.reset() for meter in meters_at] state['iterator'] = tqdm(iter_train, dynamic_ncols=True) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() meter_loss.reset() classacc.reset() timer_test.reset() engine.test(h, iter_test) print(log({ "train_loss": train_loss[0], "train_acc": train_acc, "test_loss": meter_loss.value()[0], "test_acc": classacc.value(), "epoch": state['epoch'], "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), "at_losses": [m.value() for m in meters_at], }, state)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, iter_train, opt.epochs, optimizer) print("total time: {}".format(time.time()-st))
def main(): opt = parser.parse_args() print('parsed options:', vars(opt)) epoch_step = json.loads(opt.epoch_step) num_classes = 10 if opt.dataset == 'CIFAR10' else 100 torch.manual_seed(opt.seed) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id def create_iterator(mode): return DataLoader(create_dataset(opt, mode), opt.batch_size, shuffle=mode, num_workers=opt.nthread, pin_memory=torch.cuda.is_available()) train_loader = create_iterator(True) test_loader = create_iterator(False) f_1, params_1 = resnet(opt.depth, opt.width, num_classes) f_2, params_2 = resnet(opt.depth, opt.width, num_classes) def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD([v for v in params_1.values() if v.requires_grad] + [v for v in params_2.values() if v.requires_grad], lr, momentum=0.9, weight_decay=opt.weight_decay) optimizer = create_optimizer(opt, opt.lr) epoch = 0 if opt.resume != '': raise NotImplementedError print('\nParameters:') print_tensor_dict(params_1) print_tensor_dict(params_2) n_parameters = sum([p.numel() for p in params_1.values() if p.requires_grad] + [p.numel() for p in params_2.values() if p.requires_grad]) print('\nTotal number of parameters:', n_parameters) meter_loss = tnt.meter.AverageValueMeter() classacc = tnt.meter.ClassErrorMeter(accuracy=True) timer_train = tnt.meter.TimeMeter('s') timer_test = tnt.meter.TimeMeter('s') classacc_ep1 = tnt.meter.ClassErrorMeter(accuracy=True) classacc_ep2 = tnt.meter.ClassErrorMeter(accuracy=True) if not os.path.exists(opt.save): os.mkdir(opt.save) def h(sample): global _outputs, _loss connection_map = np.array([ [0,0,0, 1,1,1], [0,0,0, 1,1,1], [0,0,0, 1,1,1], [1,1,1, 0,0,0], [1,1,1, 0,0,0], [1,1,1, 0,0,0]]) inputs = cast(sample[0], opt.dtype) targets = cast(sample[1], 'long') net1_outputs = data_parallel(f_1, inputs, params_1, sample[2], list(range(opt.ngpu))) net2_outputs = data_parallel(f_2, inputs, params_2, sample[2], list(range(opt.ngpu))) net1_outputs = [o.float() for o in net1_outputs] net2_outputs = [o.float() for o in net2_outputs] _loss = [] # hard supervision for i, o in enumerate(net1_outputs): _loss.append(F.cross_entropy(o, targets)) for i, o in enumerate(net2_outputs): _loss.append(F.cross_entropy(o, targets)) outputs = net1_outputs + net2_outputs # soft supervision for i, o in enumerate(outputs): for j, o2 in enumerate(outputs): if connection_map[i,j] > 0: _loss.append(KL_divergence(o2.detach(),o)) loss = sum(_loss) _outputs = net1_outputs return loss, net1_outputs[-1] def log(t, state): torch.save(dict(params=params_1, epoch=t['epoch'], optimizer=state['optimizer'].state_dict()), os.path.join(opt.save, 'model.pt7')) z = {**vars(opt), **t} with open(os.path.join(opt.save, 'log.txt'), 'a') as flog: flog.write('json_stats: ' + json.dumps(z) + '\n') print(z) def on_sample(state): state['sample'].append(state['train']) def on_forward(state): loss = float(state['loss']) classacc.add(state['output'].data, state['sample'][1]) classacc_ep1.add(_outputs[0].data, state['sample'][1]) classacc_ep2.add(_outputs[1].data, state['sample'][1]) meter_loss.add(loss) if state['train']: state['iterator'].set_postfix(loss=loss) def on_start(state): state['epoch'] = epoch def on_start_epoch(state): classacc.reset() classacc_ep1.reset() classacc_ep2.reset() meter_loss.reset() timer_train.reset() state['iterator'] = tqdm(train_loader, dynamic_ncols=True) epoch = state['epoch'] + 1 if epoch in epoch_step: lr = state['optimizer'].param_groups[0]['lr'] state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) def on_end_epoch(state): train_loss = meter_loss.value() train_acc = classacc.value() train_time = timer_train.value() train_acc_ep1 = classacc_ep1.value() train_acc_ep2 = classacc_ep2.value() meter_loss.reset() classacc.reset() timer_test.reset() classacc_ep1.reset() classacc_ep2.reset() with torch.no_grad(): engine.test(h, test_loader) test_acc = classacc.value()[0] test_acc_ep1 = classacc_ep1.value()[0] test_acc_ep2 = classacc_ep2.value()[0] print(log({ "train_loss": train_loss[0], "train_acc": train_acc[0], "test_loss": meter_loss.value()[0], "test_acc": test_acc, "train_acc_ep1": train_acc_ep1[0], "train_acc_ep2": train_acc_ep2[0], "test_acc_ep1": test_acc_ep1, "test_acc_ep2": test_acc_ep2, "epoch": state['epoch'], "num_classes": num_classes, "n_parameters": n_parameters, "train_time": train_time, "test_time": timer_test.value(), }, state)) print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % (opt.save, state['epoch'], opt.epochs, test_acc)) engine = Engine() engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.hooks['on_start'] = on_start engine.train(h, train_loader, opt.epochs, optimizer)