class Visualier(): """Visulization, plot the logs during training process""" def __init__(self, num_classes=10): port = 8097 self.loss_logger = VisdomPlotLogger('line', port=port, win="Loss", opts={'title': 'Loss Logger'}) self.acc_logger = VisdomPlotLogger('line', port=port, win="acc", opts={'title': 'Accuracy Logger'}) self.confusion_logger = VisdomLogger('heatmap', port=port, win="confusion", opts={ 'title': 'Confusion matrix', 'columnnames': list(range(num_classes)), 'rownames': list(range(num_classes)) }) def plot(self, train_acc, train_err, val_acc, val_err, confusion, epoch): self.loss_logger.log(epoch, train_err, name="train") self.acc_logger.log(epoch, train_acc, name="train") self.loss_logger.log(epoch, val_err, name="val") self.acc_logger.log(epoch, val_acc, name="val") self.confusion_logger.log(confusion) print("epoch: [%d/%d]" % (epoch, args.n_epoches)) print('Training loss: %.4f, accuracy: %.2f%%' % (train_err, train_acc)) print('Validation loss: %.4f, accuracy: %.2f%%' % (val_err, val_acc))
class ConfusionVisdom(object): '''Plot test confusion matrix in a VisdomLogger ''' def __init__(self, num_classes, title='TBD'): self._confusion = VisdomLogger( 'heatmap', opts={ 'title': '{:s} Confusion Matrix'.format(title), 'columnnames': list(range(num_classes)), 'rownames': list(range(num_classes)) }) check_visdom_server(self._confusion.viz) def log(self, confusion, train=None): assert train is not None,\ 'train should be True or False, not {}'.format(train) if train: pass else: try: self._confusion.log(confusion) except BaseException as e: check_visdom_server(self._confusion.viz) print(e) print("***Retry ConfusionVisdom") self.log(confusion, train)
def __init__(self, num_classes, title='TBD'): self._confusion = VisdomLogger( 'heatmap', opts={ 'title': '{:s} Confusion Matrix'.format(title), 'columnnames': list(range(num_classes)), 'rownames': list(range(num_classes)) }) check_visdom_server(self._confusion.viz)
def log_image(im_logger: VisdomLogger, img: torch.Tensor, batch_size: int = None): if batch_size is None: batch_size = img.shape[0] grid_img = make_grid(img.detach().cpu(), nrow=int(batch_size**0.5), normalize=True, range=(0, 1)).numpy() im_logger.log(grid_img) return grid_img
def __addlogger(self, meter, ptype): if ptype == 'line': opts = {'title': self.title + ' Train ' + meter} self.logger['Train'][meter] = VisdomPlotLogger(ptype, server=self.server, port=self.port, opts=opts) opts = {'title': self.title + ' Test ' + meter} self.logger['Test'][meter] = VisdomPlotLogger(ptype, server=self.server, port=self.port, opts=opts) elif ptype == 'heatmap': names = list(range(self.nclass)) opts = {'title': self.title + ' Train ' + meter, 'columnnames': names, 'rownames': names} self.logger['Train'][meter] = VisdomLogger('heatmap', server=self.server, port=self.port, opts=opts) opts = {'title': self.title + ' Test ' + meter, 'columnnames': names, 'rownames': names} self.logger['Test'][meter] = VisdomLogger('heatmap', server=self.server, port=self.port, opts=opts)
def __init__(self, args): self.args = args self.lossAvg = tnt.meter.AverageValueMeter() #self.lossSparseMu = tnt.meter.AverageValueMeter() #self.lossSparseVar = tnt.meter.AverageValueMeter() self.train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env='PoseCapsules') self.test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}, env='PoseCapsules') self.recon_sum = 0 self.rout_id = 1 if not self.args.disable_recon: self.reconLossAvg = tnt.meter.AverageValueMeter() self.ground_truth_logger_left = VisdomLogger('image', opts={'title': 'Ground Truth, left'}, env='PoseCapsules') self.reconstruction_logger_left = VisdomLogger('image', opts={'title': 'Reconstruction, left'}, env='PoseCapsules') if self.args.regularize: self.regularizeLossAvg = tnt.meter.AverageValueMeter() self.logsigAvg = tnt.meter.AverageValueMeter() self.costmeanAvg = tnt.meter.AverageValueMeter() self.costAvg = tnt.meter.AverageValueMeter() self.aAvg = tnt.meter.AverageValueMeter()
def __init__(self, num_classes=10): port = 8097 self.loss_logger = VisdomPlotLogger('line', port=port, win="Loss", opts={'title': 'Loss Logger'}) self.acc_logger = VisdomPlotLogger('line', port=port, win="acc", opts={'title': 'Accuracy Logger'}) self.confusion_logger = VisdomLogger('heatmap', port=port, win="confusion", opts={ 'title': 'Confusion matrix', 'columnnames': list(range(num_classes)), 'rownames': list(range(num_classes)) })
def __addlogger(self, meter, ptype): if ptype == 'line': if self.plotstylecombined: opts = {'title': self.title + ' ' + meter} self.logger['Train'][meter] = VisdomPlotLogger( ptype, win=meter, env=self.env, server=self.server, port=self.port, opts=opts) opts = {} self.logger['Test'][meter] = self.logger['Train'][meter] else: opts = {'title': self.title + 'Train ' + meter} self.logger['Train'][meter] = VisdomPlotLogger( ptype, win=meter, env=self.env, server=self.server, port=self.port, opts=opts) opts = {'title': self.title + 'Test ' + meter} self.logger['Test'][meter] = VisdomPlotLogger( ptype, win=meter, env=self.env, server=self.server, port=self.port, opts=opts) elif ptype == 'heatmap': names = list(range(self.nclass)) opts = { 'title': self.title + ' Train ' + meter, 'columnnames': names, 'rownames': names } self.logger['Train'][meter] = VisdomLogger('heatmap', win=('train_' + meter), env=self.env, server=self.server, port=self.port, opts=opts) opts = { 'title': self.title + ' Test ' + meter, 'columnnames': names, 'rownames': names } self.logger['Test'][meter] = VisdomLogger('heatmap', win=('test_' + meter), env=self.env, server=self.server, port=self.port, opts=opts) elif ptype == 'bar': names = list(range(self.nclass)) opts = {'title': self.title + 'Train ' + meter, 'rownames': names} self.logger['Train'][meter] = VisdomLogger('bar', win=meter, env=self.env, server=self.server, port=self.port, opts=opts) opts = {'title': self.title + 'Test ' + meter, 'rownames': names} self.logger['Test'][meter] = VisdomLogger('bar', win=meter, env=self.env, server=self.server, port=self.port, opts=opts)
# Temporary PyTorch bugfix: https://github.com/pytorch/pytorch/issues/2830 for state in optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() if args.use_cuda: lambda_ = torch.tensor([args.max_lambda]).cuda() """ Logging of loss, reconstruction and ground truth """ meter_loss = tnt.meter.AverageValueMeter() meter_loss_dae = tnt.meter.AverageValueMeter() setting_logger = VisdomLogger('text', opts={'title': 'Settings'}, env=args.env_name) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env=args.env_name) epoch_offset = 0 if args.load_loss: if os.path.isfile('loss.log'): with open("loss.log", "r") as lossfile: loss_list = [] for loss in lossfile: loss_list.append(loss) while len(loss_list) > args.load_loss: loss_list.pop(0) for loss in loss_list: train_loss_logger.log(epoch_offset, float(loss)) epoch_offset += 1 ground_truth_logger_left = VisdomLogger('image', opts={'title': 'Ground Truth, left'}, env=args.env_name)
class_criterion_B, train_loader, test_loader, opt) if opt.pretrained: trained_model = load_weight(fullModel, opt.pretrained, verbose=True) # -- Evaluation nTestImages = reid_set.test_inds # [2 ** (n+1) for n in range(5)] cmc, simMat, _, avgSame, avgDiff = compute_cmc(reid_set, nTestImages, trained_model, 128) print(cmc) print(simMat) print(avgSame, avgDiff) sim_logger = VisdomLogger('heatmap', port=8097, opts={ 'title': 'simMat', 'columnnames': list(range(len(simMat[0]))), 'rownames': list(range(len(simMat))) }) cmc_logger = VisdomPlotLogger("line", win="cmc_curve") for i, v in enumerate(cmc): cmc_logger.log(i, v, name="cmc_curve") sim_logger.log(simMat) log.info("Saving results...") with open("cmc.pkl", 'w') as f: pickle.dump(cmc, f) with open("simMat.pkl", 'w') as f: pickle.dump(simMat, f)
def test_only(model, train_dataloader, val_dataloader, optimizer, loss_fn, metrics, params, model_dir, logger, restore_file=None): # reload weights from restore_file if specified if restore_file is not None: logging.info("Restoring parameters from {}".format(restore_file)) checkpoint = utils.load_checkpoint(restore_file, model, optimizer) best_val_acc = checkpoint['best_val_acc'] params.current_epoch = checkpoint['epoch'] print('best_val_acc=', best_val_acc) print(optimizer.state_dict()['param_groups'][0]['lr'], checkpoint['epoch']) train_confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': params.experiment_path + 'train_Confusion matrix', 'columnnames': columnnames, 'rownames': rownames }, env='Test') test_confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': params.experiment_path + 'test_Confusion matrix', 'columnnames': columnnames, 'rownames': rownames }, env='Test') diff_confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': params.experiment_path + 'diff_Confusion matrix', 'columnnames': columnnames, 'rownames': rownames }, env='Test') # Evaluate for one epoch on validation set # model.train() model.eval() train_metrics, train_confusion_meter = evaluate(model, loss_fn, train_dataloader, metrics, params, logger) train_confusion_logger.log(train_confusion_meter.value()) model.eval() val_metrics, test_confusion_meter = evaluate(model, loss_fn, val_dataloader, metrics, params, logger) test_confusion_logger.log(test_confusion_meter.value()) diff_confusion_meter = train_confusion_meter.value( ) - test_confusion_meter.value() diff_confusion_logger.log(diff_confusion_meter) pass
def get_iterator(mode): if mode is True: dataset = dataset_train elif mode is False: dataset = dataset_train loader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=8, shuffle=mode) return loader ##------------------log visualization------------------------## train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) ground_truth_logger = VisdomLogger('image', opts={'title': 'Images'}) reconstruction_logger = VisdomLogger('image', opts={'title': 'Segmentations'}) def reset_meters(): meter_loss.reset() def on_sample(state): state['sample'].append(state['train']) def on_forward(state): meter_loss.add(state['loss'].item()) def on_start_epoch(state): reset_meters() state['iterator'] = tqdm(state['iterator'])
def train_sequence( model: AMOCNet, contrast_criterion: torch.nn.Module, class_criterion_A: torch.nn.Module, class_criterion_B: torch.nn.Module, train_loader: torch.utils.data.DataLoader, test_loader: torch.utils.data.DataLoader, opt, printInds: Iterable = None) -> (torch.nn.Module, dict, dict): optimizer = optim.Adam(model.parameters(), lr=opt.learningRate) timer = Timer() confusion_logger = VisdomLogger('heatmap', port=8097, opts={ 'title': 'simMat', 'columnnames': list(range(len(train_loader.dataset))), 'rownames': list(range(len(train_loader.dataset))) }) epoch = 0 if opt.pretrained or opt.motionnet_pretrained: model, optimier, epoch = load_dicts( model, optimizer, opt.pretrained or opt.motionnet_pretrained) if printInds is None: printInds = list(range(10)) def iterate_func(engine, batch): optimizer.zero_grad() inputA, inputB, target, personA, personB, ind, _, _ = batch if len(inputA.shape) == len(inputB.shape) == 4: inputA = torch.unsqueeze(inputA, 0) inputB = torch.unsqueeze(inputB, 0) assert inputA.shape[1] == inputB.shape[1] == opt.sampleSeqLength, \ ValueError(f"ind: {ind}, inputA {inputA.shape}, inputB {inputB.shape}, required seq lenth {opt.sampleSeqLength}") if torch.cuda.is_available(): inputA = inputA.float().cuda() inputB = inputB.float().cuda() target = target.float().cuda() personA = personA.long().cuda() personB = personB.long().cuda() distance, outputA, outputB = model(inputA, inputB) contrast_loss = contrast_criterion(distance, target) class_loss_A = class_criterion_A(outputA, personA) class_loss_B = class_criterion_B(outputB, personB) loss = contrast_loss + class_loss_A + class_loss_B loss.backward() clip_grad_value_(model.parameters(), clip_value=opt.gradClip or sys.maxsize) optimizer.step() return loss.item(), contrast_loss.item(), class_loss_A.item( ), class_loss_B.item() trainer = Engine(iterate_func) train_history = {'cnst': [], 'ceA': [], 'ceB': [], 'ttl': []} val_history = {'avgSame': [], 'avgDiff': [], 'cmc': [], 'simMat': []} RunningAverage(alpha=1, output_transform=lambda x: x[0]).attach(trainer, 'ttl') RunningAverage(alpha=1, output_transform=lambda x: x[1]).attach(trainer, 'cnst') RunningAverage(alpha=1, output_transform=lambda x: x[2]).attach(trainer, 'ceA') RunningAverage(alpha=1, output_transform=lambda x: x[3]).attach(trainer, 'ceB') train_loss_logger = VisdomPlotLogger("line", name="train") val_loss_logger = VisdomPlotLogger("line", name="val") score_func = lambda engine: -engine.state.metrics['ttl'] checkpoint_handler = ModelCheckpointSaveBest( opt.checkpoint_path, filename_prefix=opt.saveFileName, score_function=score_func, require_empty=False, save_as_state_dict=True) # stop_handler = EarlyStopping(patience=30, trainer=trainer, # score_function=score_func) @trainer.on(Events.STARTED) def resume_training(engine): engine.state.iteration = epoch * len(engine.state.dataloader) engine.state.epoch = epoch checkpoint_handler._iteration = epoch @trainer.on(Events.EPOCH_COMPLETED) def trainer_log(engine: Engine): avg_ttl = engine.state.metrics['ttl'] avg_cnst = engine.state.metrics['cnst'] avg_ceA = engine.state.metrics['ceA'] avg_ceB = engine.state.metrics['ceB'] lr = optimizer.param_groups[0]['lr'] print( f"Epoch[{engine.state.epoch}]\tlr={lr:.2e}\telapsed:{timer.value():.2f}s:\t" f"TTL={avg_ttl:.3f}\tContrast={avg_cnst:04.3f}\t" f"CrossEntA={avg_ceA:04.3f}\tCrossEntB={avg_ceB:04.3f}") train_loss_logger.log(engine.state.epoch, avg_ttl, name="avg_total_loss") train_loss_logger.log(engine.state.epoch, avg_cnst, name="avg_contrast") train_loss_logger.log(engine.state.epoch, avg_ceA, name="avg_CrossEnt_A") train_loss_logger.log(engine.state.epoch, avg_ceB, name="avg_CrossEnt_B") @trainer.on(Events.ITERATION_COMPLETED) def adjust_lr(engine): # learning rate decay if engine.state.iteration >= 20000: lr = opt.learningRate * (0.1**min( (engine.state.iteration - 10000) // opt.lr_decay, 5)) for param_group in optimizer.param_groups: param_group['lr'] = lr def on_complete(engine, dataloader, mode, history_dict): if not engine.state.epoch % opt.samplingEpochs: cmc, simMat, _, avgSame, avgDiff = compute_cmc( dataloader.dataset, printInds, model, opt.sampleSeqLength) metrics = { "cmc": cmc, "simMat": simMat, "avgSame": avgSame, "avgDiff": avgDiff } outString = ' '.join((str(np.floor(cmc[c])) for c in printInds)) print( f"{mode} Result: Epoch[{engine.state.epoch}]- Avg Same={avgSame:.3f}\tAvg Diff={avgDiff:.3f}" ) print(outString) confusion_logger.log(simMat) val_loss_logger.log(trainer.state.epoch, avgSame, name="avg_same") val_loss_logger.log(trainer.state.epoch, avgDiff, name="avg_diff") if mode == "Validation": for key in val_history.keys(): history_dict[key].append(metrics[key]) trainer.add_event_handler(Events.EPOCH_COMPLETED, on_complete, train_loader, 'Training', train_history) trainer.add_event_handler(Events.EPOCH_COMPLETED, on_complete, test_loader, 'Validation', val_history) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # trainer.add_event_handler(Events.EPOCH_COMPLETED, stop_handler) checkpoint_handler.attach(trainer, model_dict={ "model": model, "optimizer": optimizer }) trainer.run(train_loader, max_epochs=opt.nEpochs) return model, trainer_log, val_history
batch_size=opt.batchSize, shuffle=True, num_workers=opt.n_cpu) loss_meters: Dict[str, AverageValueMeter] = { 'loss_G_meter': AverageValueMeter(), 'loss_G_identity_meter': AverageValueMeter(), 'loss_G_GAN_meter': AverageValueMeter(), 'loss_G_cycle_meter': AverageValueMeter(), 'loss_D_meter': AverageValueMeter() } # Loss plot # logger = Logger(opt.n_epochs, len(data_loader)) loss_logger = VisdomPlotLogger('line', opts={'title': 'Loss'}) real_A_im_logger = VisdomLogger('image', opts={'title': 'Real A'}) real_B_im_logger = VisdomLogger('image', opts={'title': 'Real B'}) fake_A_im_logger = VisdomLogger('image', opts={'title': 'Fake A'}) fake_B_im_logger = VisdomLogger('image', opts={'title': 'Fake B'}) ################################### # ##### Training ###### for epoch in range(opt.epoch, opt.n_epochs): print(f"Epoch:{epoch}") for i, batch in enumerate(tqdm(data_loader)): # Set model input real_A = batch['A'].to( device ) # input_A.copy_(batch['A']).clone().detach().requires_grad_(True) real_B = batch['B'].to(
def main(): train_data, val_data, num_classes = get_imager_folder() params = { 'conv0.weight': conv_init(1, 50, 5), 'conv0.bias': torch.zeros(50), 'conv1.weight': conv_init(50, 50, 5), 'conv1.bias': torch.zeros(50), 'conv2.weight': conv_init(50, 50, 5), 'conv2.bias': torch.zeros(50), 'linear2.weight': linear_init(800, 512), 'linear2.bias': torch.zeros(512), 'linear3.weight': linear_init(512, num_classes), 'linear3.bias': torch.zeros(num_classes), } params = {k: Variable(v, requires_grad=True) for k, v in params.items()} model = models.resnet50(pretrained=True) model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) # for p in model.parameters(): # p.requires_grad = False model.fc = torch.nn.Linear(model.fc.in_features, num_classes) model.cuda() params = model.parameters() optimizer = torch.optim.Adagrad(params) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() classerr = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(num_classes, normalized=True) port = 8097 train_loss_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Train Loss'}) train_err_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Train Acc'}) test_loss_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Test Loss'}) test_err_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Test Acc'}) confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': 'Confusion matrix', 'columnnames': list(range(num_classes)), 'rownames': list(range(num_classes)) }) acc_logger = VisdomPlotLogger('line', port=port, opts={ 'xlabel': 'Epochs', 'ylabel': 'Accuracy', 'legend': ['train', 'val'] }) loss_logger = VisdomPlotLogger('line', port=port, opts={ 'xlabel': 'Epochs', 'ylabel': 'Accuracy', 'legend': ['train', 'val'] }) def h(sample): inputs = Variable(sample[0].cuda()) # .float() / 255.0) targets = Variable(sample[1].cuda()) outputs = model(inputs) # if model.training: #and model.__all__[1] == 'inception_v3': # outputs = outputs[0] # o = f(params, inputs, sample[2]) return F.cross_entropy(outputs, targets), outputs def reset_meters(): classerr.reset() meter_loss.reset() confusion_meter.reset() def on_sample(state): state['sample'].append(state['train']) def on_forward(state): classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) confusion_meter.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): model.train() reset_meters() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): print('Training loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])) train_loss_logger.log(state['epoch'], meter_loss.value()[0]) train_err_logger.log(state['epoch'], classerr.value()[0]) # train_acc = classerr.value()[0] # train_loss = meter_loss.value()[0] # do validation at the end of each epoch reset_meters() model.eval() engine.test(h, get_iterator(train_data, val_data, False)) # y_acc = np.column_stack((np.array(train_acc), np.array(classerr.value()[0]))) # y_loss = np.column_stack((np.array(train_acc), np.array(classerr.value()[0]))) # acc_logger.log(state['epoch'], np.column_stack((np.array(train_acc), np.array(classerr.value()[0])))) # loss_logger.log(state['epoch'], np.column_stack((np.array(train_loss), np.array(meter_loss.value()[0])))) test_loss_logger.log(state['epoch'], meter_loss.value()[0]) test_err_logger.log(state['epoch'], classerr.value()[0]) confusion_logger.log(confusion_meter.value()) print('Testing loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])) engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, get_iterator(train_data, val_data, True), maxepoch=50, optimizer=optimizer)
def main(n): viz = Visdom() params = { 'conv0.weight': conv_init(1, 50, 5), 'conv0.bias': torch.zeros(50), 'conv1.weight': conv_init(50, 50, 5), 'conv1.bias': torch.zeros(50), 'linear2.weight': linear_init(800, 512), 'linear2.bias': torch.zeros(512), 'linear3.weight': linear_init(512, 10), 'linear3.bias': torch.zeros(10), } # 创建参数字典 conv_init 和 linear_init 采用 He正规 params = {k: Variable(v, requires_grad=True) for k, v in params.items()} # torch.autograd.Variable Tensor 转 Variable if n == 1: optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) if n == 2: optimizer = torch.optim.Adam(params.values(), lr=0.001, betas=(0.9, 0.99)) if n == 3: optimizer = torch.optim.RMSprop(params.values(), lr=0.01, alpha=0.9) # 方法:SGD engine = Engine() # Engine给训练过程提供了一个模板,该模板建立了model,DatasetIterator,Criterion和Meter之间的联系 meter_loss = tnt.meter.AverageValueMeter() # 用于统计任意添加的变量的方差和均值,可以用来测量平均损失等 classerr = tnt.meter.ClassErrorMeter(accuracy=True) # 该meter用于统计分类误差 confusion_meter = tnt.meter.ConfusionMeter(10, normalized=True) # 多类之间的混淆矩阵 port = 8097 # 端口 train_loss_logger = VisdomPlotLogger('line', port=port, opts={}, win='102') # 定义win,name不能在这里设置,应该在这里的opts把标签legend设置完毕: viz.update_window_opts( win='101', opts=dict( legend=['Apples', 'Pears'], xtickmin=0, xtickmax=1, xtickstep=0.5, ytickmin=0, ytickmax=1, ytickstep=0.5, markersymbol='cross-thin-open', ), ) # train_loss 折线 train_err_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Train Class Error' }) # train_err 折线 test_loss_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Test Loss'}) # test_loss 折线 test_err_logger = VisdomPlotLogger( 'line', port=port, opts={'title': 'Test Class Error'}, ) # test_err 折线 confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': 'Confusion matrix', 'columnnames': list(range(10)), 'rownames': list(range(10)) }) # 误判信息 def h(sample): # 数据获取, f(参数,输入,mode), o为结果 inputs = Variable(sample[0].float() / 255.0) targets = Variable(torch.LongTensor(sample[1])) o = f(params, inputs, sample[2]) return F.cross_entropy(o, targets), o # 返回Loss,o def reset_meters(): # meter重置 classerr.reset() meter_loss.reset() confusion_meter.reset() # hooks = { # ['on_start'] = function() end, --用于训练开始前的设置和初始化 # ['on_start_epoch'] = function()end, -- 每一个epoch前的操作 # ['on_sample'] = function()end, -- 每次采样一个样本之后的操作 # ['on_forward'] = function()end, -- 在model: forward()之后的操作 # ?['onForwardCriterion'] = function()end, -- 前向计算损失函数之后的操作 # ?['onBackwardCriterion'] = function()end, -- 反向计算损失误差之后的操作 # ['on_backward'] = function()end, -- 反向传递误差之后的操作 # ['on_update'] = function()end, -- 权重参数更新之后的操作 # ['on_end_epoch'] = function()end, -- 每一个epoch结束时的操作 # ['on_end'] = function()end, -- 整个训练过程结束后的收拾现场 # } # state = { # ['network'] = network, --设置了model # ['criterion'] = criterion, -- 设置损失函数 # ['iterator'] = iterator, -- 数据迭代器 # ['lr'] = lr, -- 学习率 # ['lrcriterion'] = lrcriterion, -- # ['maxepoch'] = maxepoch, --最大epoch数 # ['sample'] = {}, -- 当前采集的样本,可以在onSample中通过该阈值查看采样样本 # ['epoch'] = 0, -- 当前的epoch # ['t'] = 0, -- 已经训练样本的个数 # ['training'] = true - - 训练过程 # } # def train(self, network, iterator, maxepoch, optimizer): # state = { # 'network': network, # 'iterator': iterator, # 'maxepoch': maxepoch, # 'optimizer': optimizer, # 'epoch': 0, # epoch # 't': 0, # sample # 'train': True, # } def on_sample(state): # 每次采样一个样本之后的操作 state['sample'].append(state['train']) # 样本采集之后训练 if state.get('epoch') != None and state['t'] > 10: if n == 1: train_loss_logger.log(state['t'], meter_loss.value()[0], name="SGD") if n == 2: train_loss_logger.log(state['t'], meter_loss.value()[0], name="Adam") if n == 3: train_loss_logger.log(state['t'], meter_loss.value()[0], name="RMSprop") reset_meters() def on_forward(state): # 在model: forward()之后的操作 classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) confusion_meter.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): # 每一个epoch前的操作 reset_meters() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): # 每一个epoch结束时的操作 print('Training loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])) # train_loss_logger.log(state['epoch'], meter_loss.value()[0]) # train_err_logger.log(state['epoch'], classerr.value()[0]) # do validation at the end of each epoch reset_meters() engine.test(h, get_iterator(False)) # test_loss_logger.log(state['epoch'], meter_loss.value()[0]) # test_err_logger.log(state['epoch'], classerr.value()[0]) # confusion_logger.log(confusion_meter.value()) print('Testing loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])) engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, get_iterator(True), maxepoch=1, optimizer=optimizer)
def train_motion_net(model: MotionNet, criterion: torch.nn.SmoothL1Loss, train_loader: torch.utils.data.DataLoader, test_loader: torch.utils.data.DataLoader, opt): optimizer = optim.Adam(model.parameters(), lr=opt.learningRate) model = init_weights(model) of_logger = VisdomLogger('image', win="of", port=8097, opts={"caption": "output"}) gt_of_logger = VisdomLogger('image', win="gt", port=8097, opts={"caption": "gt"}) loss_weight = [0.01, 0.02, 0.08] epoch = 0 if opt.pretrained: model, optimier, epoch = load_dicts(model, optimizer, opt.pretrained) def iterate_func(engine, batch): model.train() inputA, inputB, _, _, _, ind, ofA, ofB = batch if len(inputA.shape) == len(inputB.shape) == 4: inputA = inputA.unsqueeze(0) inputB = inputB.unsqueeze(0) assert inputA.shape[1] == inputB.shape[1] == opt.sampleSeqLength, \ ValueError(f"ind: {ind}, inputA {inputA.shape}, inputB {inputB.shape}, required seq lenth {opt.sampleSeqLength}") if torch.cuda.is_available(): inputA = inputA.float().cuda() inputB = inputB.float().cuda() ofA = ofA.float().cuda() ofB = ofB.float().cuda() def _iterate(input_, of): """ single passthrough of training of MotionNet :param input: two consecutive frames concatenated along axis 0: [1, 6, W, H] :param of: target feature map of output of MotionNet: [1, 2, W, H] :return: """ optimizer.zero_grad() outs = list(model(input_)) losses = [] for i, out in enumerate(outs): factor = of.shape[2] // out.shape[2] gt = AvgPool2d(factor, factor)(of).detach().data losses += [criterion(out, gt) * loss_weight[i]] loss = sum(losses) loss.backward() optimizer.step() return loss.item() for i in range(inputA.shape[1] - 1): consecutive_frame = torch.cat( (inputA[:, i, ...], inputA[:, i + 1, ...]), 1) _iterate(consecutive_frame, ofA[:, i, ...]) for i in range(inputB.shape[1] - 1): consecutive_frame = torch.cat( (inputB[:, i, ...], inputB[:, i + 1, ...]), 1) losses = _iterate(consecutive_frame, ofB[:, i, ...]) return losses def eval_func(engine, batch): cnt = 1 model.eval() with torch.no_grad(): inputA, inputB, _, _, _, ind, ofA_, ofB_ = batch if len(inputA.shape) == len(inputB.shape) == 4: inputA = inputA.unsqueeze(0) inputB = inputB.unsqueeze(0) assert inputA.shape[1] == inputB.shape[1] == opt.sampleSeqLength, \ ValueError(f"ind: {ind}, inputA {inputA.shape}, inputB {inputB.shape}, required seq lenth {opt.sampleSeqLength}") if torch.cuda.is_available(): inputA = inputA.float().cuda() inputB = inputB.float().cuda() ofA = ofA_.float().cuda() ofB = ofB_.float().cuda() def _iterate(input_, of): outs = list(model(input_)) loss = [] for i, out in enumerate(outs): factor = of.shape[2] // out.shape[2] gt = AvgPool2d(factor, factor)(of).detach().data loss += [criterion(out, gt) * loss_weight[i]] return sum(loss).item(), outs[-1] for i in range(inputA.shape[1] - 1): consecutive_frame = torch.cat( (inputA[:, i, ...], inputA[:, i + 1, ...]), 1) _, out = _iterate(consecutive_frame, ofA[:, i, ...]) if cnt: cnt -= 1 of_logger.log(vis_of(out.cpu())) gt_of_logger.log(vis_of(ofA_[:, i, ...])) for i in range(inputB.shape[1] - 1): consecutive_frame = torch.cat( (inputB[:, i, ...], inputB[:, i + 1, ...]), 1) losses, _ = _iterate(consecutive_frame, ofB[:, i, ...]) return losses trainer = Engine(iterate_func) evaluator = Engine(eval_func) train_history = {'loss': []} val_history = {'loss': []} RunningAverage(alpha=1, output_transform=lambda x: x).attach(trainer, 'loss') RunningAverage(alpha=1, output_transform=lambda x: x).attach(evaluator, 'loss') score_func = lambda engine: -engine.state.metrics['loss'] checkpoint_handler = ModelCheckpointSaveBest( opt.checkpoint_path, filename_prefix=opt.saveFileName, score_function=score_func, require_empty=False, save_as_state_dict=True) stop_handler = EarlyStopping(patience=30, trainer=trainer, score_function=score_func) @trainer.on(Events.STARTED) def resume_training(engine): engine.state.iteration = epoch * len(engine.state.dataloader) engine.state.epoch = epoch checkpoint_handler._iteration = epoch @trainer.on(Events.EPOCH_COMPLETED) def trainer_log(engine: Engine): loss = engine.state.metrics['loss'] lr = optimizer.param_groups[0]['lr'] print("-" * 50) print( f"Epoch[{engine.state.epoch}] lr={lr:.2E}:\t\tAvg Loss={loss:.4f}") @trainer.on(Events.ITERATION_COMPLETED) def adjust_lr(engine): # learning rate decay lr = opt.learningRate * (0.1**(engine.state.iteration // opt.lr_decay)) for param_group in optimizer.param_groups: param_group['lr'] = lr def on_complete(engine, dataloader, mode, history_dict): evaluator.run(dataloader) loss = evaluator.state.metrics["loss"] print( f"{mode} Result: Epoch[{engine.state.epoch}]:\tAvg Loss={loss:.4f}" ) if mode == "Validation": for key in val_history.keys(): history_dict[key].append(loss) trainer.add_event_handler(Events.EPOCH_COMPLETED, on_complete, train_loader, 'Training', train_history) trainer.add_event_handler(Events.EPOCH_COMPLETED, on_complete, test_loader, 'Validation', val_history) trainer.add_event_handler(Events.EPOCH_COMPLETED, stop_handler) checkpoint_handler.attach(trainer, model_dict={ "model": model, "optimizer": optimizer }) trainer.run(train_loader, max_epochs=opt.nEpochs)
meter_loss = tnt.meter.AverageValueMeter() meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) meter_confusion = tnt.meter.ConfusionMeter(num_class, normalized=True) # config the visdom figures if FINE_GRAINED and DATA_TYPE in ['reuters', 'yelp', 'amazon']: env_name = DATA_TYPE + '_fine_grained' else: env_name = DATA_TYPE train_loss_logger = VisdomPlotLogger('line', env=env_name, opts={'title': 'Train Loss'}) train_accuracy_logger = VisdomPlotLogger('line', env=env_name, opts={'title': 'Train Accuracy'}) train_confusion_logger = VisdomLogger( 'heatmap', env=env_name, opts={'title': 'Train Confusion Matrix'}) test_loss_logger = VisdomPlotLogger('line', env=env_name, opts={'title': 'Test Loss'}) test_accuracy_logger = VisdomPlotLogger('line', env=env_name, opts={'title': 'Test Accuracy'}) test_confusion_logger = VisdomLogger( 'heatmap', env=env_name, opts={'title': 'Test Confusion Matrix'}) current_step = 0 for epoch in range(1, NUM_EPOCHS + 1): for data, target in train_iterator: current_step += 1 focal_label, margin_label = target, torch.eye( num_class).index_select(dim=0, index=target)
lambda_ = 1e-3 # TODO:find a good schedule to increase lambda and m m = 0.2 A, B, C, D, E, r = 64, 8, 16, 16, args.num_classes, args.r # a small CapsNet # A, B, C, D, E, r = 32, 32, 32, 32, args.num_classes, args.r # a classic CapsNet model = CapsNet(A, B, C, D, E, r) capsule_loss = CapsuleLoss() meter_loss = tnt.meter.AverageValueMeter() meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(args.num_classes, normalized=True) setting_logger = VisdomLogger('text', opts={'title': 'Settings'}, env=args.env_name) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env=args.env_name) train_error_logger = VisdomPlotLogger('line', opts={'title': 'Train Accuracy'}, env=args.env_name) test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}, env=args.env_name) test_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Test Accuracy'}, env=args.env_name) confusion_logger = VisdomLogger('heatmap', opts={
_model.cuda() summary(_model, input_size=(3, 96, 96)) print("# parameters:", sum(param.numel() for param in _model.parameters())) ##------------------init------------------------## log = [] optimizer = SGD(_model.parameters(), lr=0.01) engine = Engine() #training loop ##------------------log visualization------------------------## train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) train_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Train Accuracy'}) test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}) test_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Test Accuracy'}) ground_truth_logger = VisdomLogger('image', opts={'title': 'Ground Truth'}) transformed_logger = VisdomLogger('image', opts={'title': 'Transformed'}) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def processor(epoch): for istraining in [True, False]: if istraining: _model.train() else: _model.eval() running_loss = 0 running_corrects = 0 for data, target in tqdm(get_iterator(istraining)): data, target = data.to(device), target.to(device)
env=env) # error_logger = VisdomPlotLogger('line',port=port, opts={'title': params.experiment_path + '_Error @top1','legend':['train','test']},win=None,env=env) error_logger15 = VisdomPlotLogger( 'line', port=port, opts={ 'title': params.experiment_path + '_Error @top1@top5', 'legend': ['train@top1', 'train@top5', 'test@top1', 'test@top5'] }, win=None, env=env) train_confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': params.experiment_path + 'train_Confusion matrix', 'columnnames': columnnames, 'rownames': rownames }, win=None, env=env) test_confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': params.experiment_path + 'test_Confusion matrix', 'columnnames': columnnames, 'rownames': rownames }, win=None, env=env) # diff_confusion_logger = VisdomLogger('heatmap', port=port, opts={'title': params.experiment_path + 'diff_Confusion matrix',
if args.log_name == '': args.log_name = args.build_type train_loss_logger = VisdomPlotLogger( 'line', opts={'title': '[{}] Train Loss'.format(args.log_name)}) train_err_logger = VisdomPlotLogger( 'line', opts={'title': '[{}] Train Class Error'.format(args.log_name)}) test_loss_logger = VisdomPlotLogger( 'line', opts={'title': '[{}] Test Loss'.format(args.log_name)}) test_err_logger = VisdomPlotLogger( 'line', opts={'title': '[{}] Test Class Error'.format(args.log_name)}) confusion_logger = VisdomLogger('heatmap', opts={ 'title': '[{}] Confusion matrix'.format( args.log_name), 'columnnames': list(range(num_classes[args.dataset])), 'rownames': list(range(num_classes[args.dataset])) }) criterion = nn.CrossEntropyLoss() def network(sample): if sample[2]: # train mode model.train() else: model.eval() inputs, targets = sample[0], sample[1] if len(args.gpus) > 0:
def main(): params = { "conv0.weight": conv_init(1, 50, 5), "conv0.bias": torch.zeros(50), "conv1.weight": conv_init(50, 50, 5), "conv1.bias": torch.zeros(50), "linear2.weight": linear_init(800, 512), "linear2.bias": torch.zeros(512), "linear3.weight": linear_init(512, 10), "linear3.bias": torch.zeros(10), } params = {k: Variable(v, requires_grad=True) for k, v in params.items()} optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() classerr = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(10, normalized=True) port = 8097 train_loss_logger = VisdomPlotLogger("line", port=port, opts={"title": "Train Loss"}) train_err_logger = VisdomPlotLogger("line", port=port, opts={"title": "Train Class Error"}) test_loss_logger = VisdomPlotLogger("line", port=port, opts={"title": "Test Loss"}) test_err_logger = VisdomPlotLogger("line", port=port, opts={"title": "Test Class Error"}) confusion_logger = VisdomLogger( "heatmap", port=port, opts={ "title": "Confusion matrix", "columnnames": list(range(10)), "rownames": list(range(10)), }, ) def h(sample): inputs = Variable(sample[0].float() / 255.0) targets = Variable(torch.LongTensor(sample[1])) o = f(params, inputs, sample[2]) return F.cross_entropy(o, targets), o def reset_meters(): classerr.reset() meter_loss.reset() confusion_meter.reset() def on_sample(state): state["sample"].append(state["train"]) def on_forward(state): classerr.add(state["output"].data, torch.LongTensor(state["sample"][1])) confusion_meter.add(state["output"].data, torch.LongTensor(state["sample"][1])) meter_loss.add(state["loss"].data[0]) def on_start_epoch(state): reset_meters() state["iterator"] = tqdm(state["iterator"]) def on_end_epoch(state): print("Training loss: %.4f, accuracy: %.2f%%" % (meter_loss.value()[0], classerr.value()[0])) train_loss_logger.log(state["epoch"], meter_loss.value()[0]) train_err_logger.log(state["epoch"], classerr.value()[0]) # do validation at the end of each epoch reset_meters() engine.test(h, get_iterator(False)) test_loss_logger.log(state["epoch"], meter_loss.value()[0]) test_err_logger.log(state["epoch"], classerr.value()[0]) confusion_logger.log(confusion_meter.value()) print("Testing loss: %.4f, accuracy: %.2f%%" % (meter_loss.value()[0], classerr.value()[0])) engine.hooks["on_sample"] = on_sample engine.hooks["on_forward"] = on_forward engine.hooks["on_start_epoch"] = on_start_epoch engine.hooks["on_end_epoch"] = on_end_epoch engine.train(h, get_iterator(True), maxepoch=10, optimizer=optimizer)
device = torch.device('cuda:{}'.format(opt.local_rank)) spenet = SpeNet().cuda().to(device) # renet = recon_net(cin=31).cuda().to(device) opt = parse.parse_args() optimzier = torch.optim.Adam(itertools.chain(spenet.parameters()),lr = opt.lr,betas=(opt.b1,opt.b2),weight_decay=0) # MseLoss = torch.nn.MSELoss() MaeLoss = torch.nn.L1Loss().to(device) T_max = (datalen//(1*opt.batch_size))*1000 schduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimzier, T_max, eta_min=1e-5, last_epoch=-1) env=opt.envname server='10.37.0.18' Loss_logger = VisdomPlotLogger('line',opts={'title':'loss2'},port=8100,env=env,server=server) Losst_logger = VisdomPlotLogger('line',opts={'title':'loss_t2'},port=8100,env=env,server=server) Lossm_logger = VisdomPlotLogger('line',opts={'title':'SSIM_t2'},port=8100,env=env,server=server) PSNR_logger = VisdomPlotLogger('line',opts={'title':'PSNR'},port=8100,env=env,server=server) train_logger = VisdomLogger('image',opts= {'title':'reconstruction image2'},port=8100,env=env,server=server) test_logger = VisdomLogger('image',opts={'title':'Residual image2'},port=8100,env=env,server=server) b_r = 0 ssim_best = 0 psnr_best = 0 if conti == True: state_dict = torch.load('/home/zhu_19/data/saperate/run_folders/2020-04-05-15-29-38/save_model2020-04-05-15-29-40/state_dicr_500.pkl') del state_dict for epoch in range(opt.n_epochs): batch = 0 loss_ = [] loss_t = [] ssim_log = [] for hsi,hsi_g,hsi_resize,msi in Hyper_train: batch = batch+1 b_r = max(batch,b_r)
download=True, transform=transforms.Compose( [transforms.ToTensor(), ])) loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=1, pin_memory=True) return loader engine = BasicEngine() meter_loss = tnt.meter.AverageValueMeter() train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env=args.env) test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}, env=args.env) image_logger = VisdomLogger('image', env=args.env) model = VAE(784, 400, args.noise_dim, batch_size=args.batch_size) model.cuda() optimizer = optim.Adam(model.parameters(), lr=1e-3) model.train() model_wrapper = ModelWrapperVAE(model, dataset_iter=get_iterator, meters={"loss": meter_loss}, loggers={"train_loss":train_loss_logger, "test_loss": test_loss_logger, "generated_image": image_logger}) engine.train(model_wrapper, get_iterator(True), maxepoch=args.epochs, optimizer=optimizer) with open(f"models/{args.env}", "wb") as f: torch.save(model, f)
def save_checkpoint(state, filename=args.checkpoint): if not os.path.exists('checkpoints'): os.mkdir('checkpoints') torch.save(state, os.path.join('checkpoints', filename)) if args.visdom: print "visdom init" train_loss_logger = VisdomPlotLogger( 'line', port=args.visdom_port, opts={'title': 'Train Loss mh, time: {}'.format(datetime.datetime.now())}) train_err_logger = VisdomPlotLogger( 'line', port=args.visdom_port, opts={'title': 'Train Class Accuracy mh, time: {}'.format(datetime.datetime.now())}) train_confusion_logger = VisdomLogger('heatmap', port=args.visdom_port, opts={'title': 'Train Confusion matrixmh, time: {}'.format(datetime.datetime.now()), 'columnnames': list( range(args.num_classes)), 'rownames': list( range(args.num_classes))}) test_loss_logger = VisdomPlotLogger( 'line', port=args.visdom_port, opts={'title': 'Test Loss mh, time: {}'.format(datetime.datetime.now())}) test_err_logger = VisdomPlotLogger( 'line', port=args.visdom_port, opts={'title': 'Test Class Accuracy mh, time: {}'.format(datetime.datetime.now())}) test_confusion_logger = VisdomLogger('heatmap', port=args.visdom_port, opts={'title': 'Test Confusion matrixmh, time: {}'.format(datetime.datetime.now()), 'columnnames': list( range(args.num_classes)), 'rownames': list( range(args.num_classes))}) def main():
engine = Engine() meter_loss = tnt.meter.AverageValueMeter() meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(NUM_CLASSES, normalized=True) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) train_error_logger = VisdomPlotLogger('line', opts={'title': 'Train Accuracy'}) test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}) test_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Test Accuracy'}) confusion_logger = VisdomLogger('heatmap', opts={ 'title': 'Confusion matrix', 'columnnames': list(range(NUM_CLASSES)), 'rownames': list(range(NUM_CLASSES)) }) ground_truth_logger = VisdomLogger('image', opts={'title': 'Ground Truth'}) reconstruction_logger = VisdomLogger('image', opts={'title': 'Reconstruction'}) capsule_loss = CapsuleLoss() def get_iterator(mode): dataset = MNIST(root='./data', download=True, train=mode) data = getattr(dataset, 'train_data' if mode else 'test_data') labels = getattr(dataset, 'train_labels' if mode else 'test_labels') tensor_dataset = tnt.dataset.TensorDataset([data, labels])
print("# parameters:", sum(param.numel() for param in model.parameters())) optimizer = Adam(model.parameters()) engine = Engine() meter_loss = tnt.meter.AverageValueMeter() meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) confusion_meter = tnt.meter.ConfusionMeter(NUM_CLASSES, normalized=True) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) train_error_logger = VisdomPlotLogger('line', opts={'title': 'Train Accuracy'}) test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}) test_accuracy_logger = VisdomPlotLogger('line', opts={'title': 'Test Accuracy'}) confusion_logger = VisdomLogger('heatmap', opts={'title': 'Confusion matrix', 'columnnames': list(range(NUM_CLASSES)), 'rownames': list(range(NUM_CLASSES))}) ground_truth_logger = VisdomLogger('image', opts={'title': 'Ground Truth'}) reconstruction_logger = VisdomLogger('image', opts={'title': 'Reconstruction'}) capsule_loss = CapsuleLoss() def get_iterator(mode): dataset = MNIST(root='./data', download=True, train=mode) data = getattr(dataset, 'train_data' if mode else 'test_data') labels = getattr(dataset, 'train_labels' if mode else 'test_labels') tensor_dataset = tnt.dataset.TensorDataset([data, labels]) return tensor_dataset.parallel(batch_size=BATCH_SIZE, num_workers=4, shuffle=mode)
def train_valid_loop(train_loader, dev_loader, test_loader, args, model, fold=None): # -------------------------------------------------------------------------- # TRAIN/VALID LOOP logger.info('-' * 100) stats = { 'timer': utils.Timer(), 'epoch': 0, 'best_valid': 0, 'best_epoch': 0, 'fold': fold } start_epoch = 0 if args.visdom: # add visdom logger code port = args.visdom_port train_loss_logger = VisdomPlotLogger( 'line', port=port, opts={'title': f'{args.model_name} Train Loss'}) train_metric_logger = VisdomPlotLogger( 'line', port=port, opts={'title': f'{args.model_name} Train Class Accuracy'}) idx2label = {i: label for label, i in model.label_dict.items()} label_names = [idx2label[i] for i in range(model.args.label_size)] train_confusion_logger = VisdomLogger( 'heatmap', port=port, opts={ 'title': f'{args.model_name} Train Confusion Matrix', 'columnnames': label_names, 'rownames': label_names }) valid_metric_logger = VisdomPlotLogger( 'line', port=port, opts={'title': f'{args.model_name} Valid Class Accuracy'}) valid_confusion_logger = VisdomLogger( 'heatmap', port=port, opts={ 'title': f'{args.model_name} Valid Confusion Matrix', 'columnnames': label_names, 'rownames': label_names }) train_confusion_meter = tnt.meter.ConfusionMeter(model.args.label_size, normalized=True) valid_confusion_meter = tnt.meter.ConfusionMeter(model.args.label_size, normalized=True) else: train_confusion_meter = None valid_confusion_meter = None try: for epoch in range(start_epoch, args.num_epochs): stats['epoch'] = epoch # Train loss = train(args, train_loader, model, stats) stats['train_loss'] = loss # Validate train train_res, train_cfm = validate( args, train_loader, model, stats, mode='train', confusion_meter=train_confusion_meter) for m in train_res: stats['train_' + m] = train_res[m] # Validate dev val_res, valid_cfm = validate( args, dev_loader, model, stats, mode='dev', confusion_meter=valid_confusion_meter) for m in train_res: stats['dev_' + m] = val_res[m] if args.visdom: train_loss_logger.log(epoch, loss) train_metric_logger.log(epoch, train_res[args.valid_metric]) train_confusion_logger.log(train_cfm) valid_metric_logger.log(epoch, val_res[args.valid_metric]) valid_confusion_logger.log(valid_cfm) train_confusion_meter.reset() valid_confusion_meter.reset() # Save best valid if val_res[args.valid_metric] > stats['best_valid']: logger.info( colored( f'Best valid: {args.valid_metric} = {val_res[args.valid_metric]*100:.2f}% ', 'yellow') + colored( f'(epoch {stats["epoch"]}, {model.updates} updates)', 'yellow')) fold_info = f'.fold_{fold}' if fold is not None else '' model.save(args.model_file + fold_info) stats['best_valid'] = val_res[args.valid_metric] stats['best_epoch'] = epoch logger.info('-' * 100) if args.stats_file: with open(args.stats_file, 'w') as f: out_stats = stats.copy() out_stats['timer'] = out_stats['timer'].time() if fold is None: del out_stats['fold'] f.write(json.dumps(out_stats) + '\n') if epoch - stats['best_epoch'] >= args.early_stopping: logger.info( colored( f'No improvement for {args.early_stopping} epochs, stop training.', 'red')) break except KeyboardInterrupt: logger.info(colored(f'User ended training. stop.', 'red')) logger.info('Load best model...') model = EntityClassifier.load(args.model_file + fold_info, args) # device = torch.device(f"cuda:{args.gpu}" if args.cuda else "cpu") # model.to(device) model.cuda() stats['epoch'] = stats['best_epoch'] if fold is not None: mode = f'fold {fold} test' else: mode = 'test' test_result, _ = validate(args, test_loader, model, stats, mode=mode) return test_result
def main(checkpoint_path, batch_size, normalized, visdom_port): checkpoint_path = Path(checkpoint_path) snapshot_path = checkpoint_path.parent.parent.parent / 'snapshot.json' with snapshot_path.open('r') as f: snapshot_dict = json.load(f) mat_id_to_label = snapshot_dict['mat_id_to_label'] label_to_mat_id = {int(v): int(k) for k, v in mat_id_to_label.items()} num_classes = len(label_to_mat_id) + 1 print(f'Loading model checkpoint from {checkpoint_path!r}') checkpoint = torch.load(checkpoint_path) model = RendNet3(num_classes=num_classes, num_roughness_classes=20, num_substances=len(SUBSTANCES), base_model=resnet.resnet18(pretrained=False)) model.load_state_dict(checkpoint['state_dict']) model.train(False) model = model.cuda() validation_dataset = rendering_dataset.MaterialRendDataset( snapshot_dict, snapshot_dict['examples']['validation'], shape=(384, 384), image_transform=transforms.inference_image_transform(INPUT_SIZE), mask_transform=transforms.inference_mask_transform(INPUT_SIZE)) validation_loader = DataLoader( validation_dataset, batch_size=batch_size, num_workers=8, shuffle=False, pin_memory=True, collate_fn=rendering_dataset.collate_fn) pred_counts = collections.defaultdict(collections.Counter) # switch to evaluate mode model.eval() confusion_meter = tnt.meter.ConfusionMeter( k=num_classes, normalized=normalized) pbar = tqdm(validation_loader) for batch_idx, batch_dict in enumerate(pbar): input_tensor = batch_dict['image'].cuda() labels = batch_dict['material_label'].cuda() # compute output output = model.forward(input_tensor) pbar.set_description(f"{output['material'].size()}") # _, pred = output['material'].topk(k=1, dim=1, largest=True, sorted=True) confusion_meter.add(output['material'].cpu(), labels.cpu()) with session_scope() as sess: materials = sess.query(models.Material).filter_by(enabled=True).all() material_id_to_name = {m.id: m.name for m in materials} mat_by_id = {m.id: m for m in materials} class_names = ['background'] class_names.extend([ mat_by_id[label_to_mat_id[i]].name for i in range(1, num_classes) ]) print(len(class_names), ) confusion_matrix = confusion_meter.value() # sorted_confusion_matrix = confusion_matrix[:, inds] # sorted_confusion_matrix = sorted_confusion_matrix[inds, :] # sorted_class_names = [class_names[i] for i in inds] confusion_logger = VisdomLogger( 'heatmap', opts={ 'title': 'Confusion matrix', 'columnnames': class_names, 'rownames': class_names, 'xtickfont': {'size': 8}, 'ytickfont': {'size': 8}, }, env='brdf-classifier-confusion', port=visdom_port) confusion_logger.log(confusion_matrix)