xlabel='Time', ylabel='Volume', ytype='log', title='Stacked area plot', marginleft=30, marginright=30, marginbottom=80, margintop=30, ), ) # Assure that the stacked area plot isn't giant viz.update_window_opts( win=win, opts=dict( width=300, height=300, ), ) try: input = raw_input # for Python 2 compatibility except NameError: pass input('Waiting for callbacks, press enter to quit.') except BaseException as e: print( "The visdom experienced an exception while running: {}\n" "The demo displays up-to-date functionality with the GitHub version, " "which may not yet be pushed to pip. Please upgrade using " "`pip install -e .` or `easy_install .`\n"
xtickmax=50, xtickstep=0.5, ytickmin=-50, ytickmax=50, ytickstep=0.5, markersymbol='cross-thin-open', ), ) viz.update_window_opts( win=old_scatter, opts=dict( legend=['Apples', 'Pears'], xtickmin=0, xtickmax=1, xtickstep=0.5, ytickmin=0, ytickmax=1, ytickstep=0.5, markersymbol='cross-thin-open', ), ) # 3d scatterplot with custom labels and ranges viz.scatter(X=np.random.rand(100, 3), Y=(Y + 1.5).astype(int), opts=dict( legend=['Men', 'Women'], markersize=5, xtickmin=0, xtickmax=2,
class TrainPipline(object): def __init__(self, opt): self.root_path = opt['path']['root_path'] self.result_path = os.path.join(self.root_path, opt['path']['result_path']) self.datasets_path = os.path.join(self.root_path, opt['path']['datasest_path']) self.n_classes = opt['model']['n_classes'] self.momentum = opt['model']['momentum'] self.weight_decay = opt['model']['weight_decay'] self.nesterov = opt['model']['nesterov'] self.n_epochs = opt['train']['n_epochs'] self.batch_size = opt['train']['batch_size'] self.learning_rate = opt['train']['learning_rate'] self.n_threads = opt['train']['n_threads'] self.checkpoint = opt['train']['checkpoint'] self.no_cuda = opt['cuda']['no_cuda'] self.model_name = '' self.model_ft = '' self.visdom_log_file = os.path.join(self.result_path, 'log_files', 'visdom.log') self.vis = Visdom(port=8097, log_to_filename=self.visdom_log_file, env='myTest_1') self.vis_loss_opts = { 'xlabel': 'epoch', 'ylabel': 'loss', 'title': 'losses', 'legend': ['train_loss', 'val_loss'] } self.vis_tpr_opts = { 'xlabel': 'epoch', 'ylabel': 'tpr', 'title': 'val_tpr', 'legend': ['tpr@fpr10-2', 'tpr@fpr10-3', 'tpr@fpr10-4'] } self.vis_epochloss_opts = { 'xlabel': 'epoch', 'ylabel': 'loss', 'title': 'epoch_losses', 'legend': ['train_loss', 'val_loss'] } def datasets(self, data_name=None): assert data_name in DatasetsList if data_name == 'CIFAR10': training_data = datasets.CIFAR10( root='./modelzoo/datasets/', train=True, download=False, transform=transforms.Compose([ # transforms.RandomResizedCrop(224), transforms.Pad(96), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) val_data = datasets.CIFAR10( root='./modelzoo/datasets/', train=False, download=False, transform=transforms.Compose([ # transforms.RandomResizedCrop(224), transforms.Pad(96), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) elif data_name == 'CIFAR100': training_data = datasets.CIFAR100( root='./modelzoo/datasets/', train=True, download=True, transform=transforms.Compose([ # transforms.RandomResizedCrop(224), transforms.Pad(96), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) val_data = datasets.CIFAR100( root='./modelzoo/datasets/', train=False, download=True, transform=transforms.Compose([ # transforms.RandomResizedCrop(224), transforms.Pad(96), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) else: train_txt_path = os.path.join(self.datasets_path, 'train.txt') val_txt_path = os.path.join(self.datasets_path, 'val.txt') my_transform = transforms.Compose( [transforms.Resize(224), transforms.ToTensor()]) training_data = MyDataset(train_txt_path, transform=my_transform) val_data = MyDataset(val_txt_path, transform=my_transform) return training_data, val_data def model(self, model_name='resnet18', model_path=None): assert model_name in ModelList self.model_name = model_name # model_ft = resnet18(pretrained=True) # num_ftrs = model_ft.fc.in_features # model_ft.fc = nn.Linear(num_ftrs, 10) self.model_ft = ModelList[self.model_name](num_classes=self.n_classes) if model_path is not None: self.model_ft.load_state_dict(model_path) else: self.model_ft.apply(weights_init) return self.model_ft def train(self, training_data, val_data, model): # data init train_loader = DataLoader( training_data, batch_size=self.batch_size, shuffle=True, # num_workers=self.n_threads, pin_memory=True) # result writer train_logger = Logger( os.path.join(self.result_path, self.model_name + '_train.log'), ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( os.path.join(self.result_path, self.model_name + '_train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) val_logger = Logger( os.path.join(self.result_path, self.model_name + '_test.log'), ['time', 'loss', 'acc']) # optimizer init optimizer = optim.SGD(model.parameters(), lr=self.learning_rate, momentum=self.momentum, weight_decay=self.weight_decay, nesterov=self.nesterov) # loss init criterion = nn.CrossEntropyLoss() print(model) if not self.no_cuda: model = nn.DataParallel(model, device_ids=[0, 1, 2, 3]).cuda() # start train for i in range(0, self.n_epochs + 1): self.train_epoch(i, train_loader, model, criterion, optimizer, train_logger, train_batch_logger) self.validation(val_data, model, criterion, val_logger) def train_epoch(self, epoch, data_loader, model, criterion, optimizer, epoch_logger, batch_logger): print('train at epoch {}'.format(epoch)) # set model to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accuracies = AverageMeter() end_time = time.time() for i, (inputs, targets) in enumerate(data_loader): data_time.update(time.time() - end_time) if not self.no_cuda: model = model.cuda() inputs = inputs.cuda() targets = targets.cuda() # inputs = Variable(inputs) # targets = Variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) acc = calculate_accuracy(outputs, targets) losses.update(loss.data, inputs.size(0)) accuracies.update(acc, inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end_time) end_time = time.time() batch_logger.log({ 'epoch': epoch, 'batch': i + 1, 'iter': (epoch - 1) * len(data_loader) + (i + 1), 'loss': losses.val, 'acc': accuracies.val, 'lr': optimizer.param_groups[0]['lr'] }) self.vislog_batch(i, losses.val) print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i + 1, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) epoch_logger.log({ 'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg, 'lr': optimizer.param_groups[0]['lr'] }) if epoch % self.checkpoint == 0: save_file_path = os.path.join( self.result_path, self.model_name + 'save_{}.pth'.format(epoch)) states = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(states, save_file_path) def validation(self, val_data, model, criterion, val_logger): val_loader = torch.utils.data.DataLoader( val_data, batch_size=self.batch_size, shuffle=False, # num_workers=self.n_threads, pin_memory=True) model.eval() losses = AverageMeter() accuracies = AverageMeter() end_time = time.time() for i, (inputs, targets) in enumerate(val_loader): if not self.no_cuda: inputs = inputs.cuda() targets = targets.cuda() outputs = model(inputs) loss = criterion(outputs, targets) acc = calculate_accuracy(outputs, targets) losses.update(loss.data, inputs.size(0)) accuracies.update(acc, inputs.size(0)) test_time = time.time() - end_time val_logger.log({ 'time': test_time, 'loss': losses.avg, 'acc': accuracies.avg }) print('TestTime {test_time:.3f}\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(test_time=test_time, loss=losses, acc=accuracies)) def vislog_batch(self, batch_idx, loss): x_value = batch_idx y_value = loss self.vis.line([y_value], [x_value], name='train_loss', win='losses', update='append') self.vis.line([2], [x_value], name='test_loss', win='losses', update='append') self.vis.update_window_opts(win='losses', opts=self.vis_loss_opts)
def main(n): viz = Visdom() params = { 'conv0.weight': conv_init(1, 50, 5), 'conv0.bias': torch.zeros(50), 'conv1.weight': conv_init(50, 50, 5), 'conv1.bias': torch.zeros(50), 'linear2.weight': linear_init(800, 512), 'linear2.bias': torch.zeros(512), 'linear3.weight': linear_init(512, 10), 'linear3.bias': torch.zeros(10), } # 创建参数字典 conv_init 和 linear_init 采用 He正规 params = {k: Variable(v, requires_grad=True) for k, v in params.items()} # torch.autograd.Variable Tensor 转 Variable if n == 1: optimizer = torch.optim.SGD(params.values(), lr=0.01, momentum=0.9, weight_decay=0.0005) if n == 2: optimizer = torch.optim.Adam(params.values(), lr=0.001, betas=(0.9, 0.99)) if n == 3: optimizer = torch.optim.RMSprop(params.values(), lr=0.01, alpha=0.9) # 方法:SGD engine = Engine() # Engine给训练过程提供了一个模板,该模板建立了model,DatasetIterator,Criterion和Meter之间的联系 meter_loss = tnt.meter.AverageValueMeter() # 用于统计任意添加的变量的方差和均值,可以用来测量平均损失等 classerr = tnt.meter.ClassErrorMeter(accuracy=True) # 该meter用于统计分类误差 confusion_meter = tnt.meter.ConfusionMeter(10, normalized=True) # 多类之间的混淆矩阵 port = 8097 # 端口 train_loss_logger = VisdomPlotLogger('line', port=port, opts={}, win='102') # 定义win,name不能在这里设置,应该在这里的opts把标签legend设置完毕: viz.update_window_opts( win='101', opts=dict( legend=['Apples', 'Pears'], xtickmin=0, xtickmax=1, xtickstep=0.5, ytickmin=0, ytickmax=1, ytickstep=0.5, markersymbol='cross-thin-open', ), ) # train_loss 折线 train_err_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Train Class Error' }) # train_err 折线 test_loss_logger = VisdomPlotLogger('line', port=port, opts={'title': 'Test Loss'}) # test_loss 折线 test_err_logger = VisdomPlotLogger( 'line', port=port, opts={'title': 'Test Class Error'}, ) # test_err 折线 confusion_logger = VisdomLogger('heatmap', port=port, opts={ 'title': 'Confusion matrix', 'columnnames': list(range(10)), 'rownames': list(range(10)) }) # 误判信息 def h(sample): # 数据获取, f(参数,输入,mode), o为结果 inputs = Variable(sample[0].float() / 255.0) targets = Variable(torch.LongTensor(sample[1])) o = f(params, inputs, sample[2]) return F.cross_entropy(o, targets), o # 返回Loss,o def reset_meters(): # meter重置 classerr.reset() meter_loss.reset() confusion_meter.reset() # hooks = { # ['on_start'] = function() end, --用于训练开始前的设置和初始化 # ['on_start_epoch'] = function()end, -- 每一个epoch前的操作 # ['on_sample'] = function()end, -- 每次采样一个样本之后的操作 # ['on_forward'] = function()end, -- 在model: forward()之后的操作 # ?['onForwardCriterion'] = function()end, -- 前向计算损失函数之后的操作 # ?['onBackwardCriterion'] = function()end, -- 反向计算损失误差之后的操作 # ['on_backward'] = function()end, -- 反向传递误差之后的操作 # ['on_update'] = function()end, -- 权重参数更新之后的操作 # ['on_end_epoch'] = function()end, -- 每一个epoch结束时的操作 # ['on_end'] = function()end, -- 整个训练过程结束后的收拾现场 # } # state = { # ['network'] = network, --设置了model # ['criterion'] = criterion, -- 设置损失函数 # ['iterator'] = iterator, -- 数据迭代器 # ['lr'] = lr, -- 学习率 # ['lrcriterion'] = lrcriterion, -- # ['maxepoch'] = maxepoch, --最大epoch数 # ['sample'] = {}, -- 当前采集的样本,可以在onSample中通过该阈值查看采样样本 # ['epoch'] = 0, -- 当前的epoch # ['t'] = 0, -- 已经训练样本的个数 # ['training'] = true - - 训练过程 # } # def train(self, network, iterator, maxepoch, optimizer): # state = { # 'network': network, # 'iterator': iterator, # 'maxepoch': maxepoch, # 'optimizer': optimizer, # 'epoch': 0, # epoch # 't': 0, # sample # 'train': True, # } def on_sample(state): # 每次采样一个样本之后的操作 state['sample'].append(state['train']) # 样本采集之后训练 if state.get('epoch') != None and state['t'] > 10: if n == 1: train_loss_logger.log(state['t'], meter_loss.value()[0], name="SGD") if n == 2: train_loss_logger.log(state['t'], meter_loss.value()[0], name="Adam") if n == 3: train_loss_logger.log(state['t'], meter_loss.value()[0], name="RMSprop") reset_meters() def on_forward(state): # 在model: forward()之后的操作 classerr.add(state['output'].data, torch.LongTensor(state['sample'][1])) confusion_meter.add(state['output'].data, torch.LongTensor(state['sample'][1])) meter_loss.add(state['loss'].data[0]) def on_start_epoch(state): # 每一个epoch前的操作 reset_meters() state['iterator'] = tqdm(state['iterator']) def on_end_epoch(state): # 每一个epoch结束时的操作 print('Training loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])) # train_loss_logger.log(state['epoch'], meter_loss.value()[0]) # train_err_logger.log(state['epoch'], classerr.value()[0]) # do validation at the end of each epoch reset_meters() engine.test(h, get_iterator(False)) # test_loss_logger.log(state['epoch'], meter_loss.value()[0]) # test_err_logger.log(state['epoch'], classerr.value()[0]) # confusion_logger.log(confusion_meter.value()) print('Testing loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0])) engine.hooks['on_sample'] = on_sample engine.hooks['on_forward'] = on_forward engine.hooks['on_start_epoch'] = on_start_epoch engine.hooks['on_end_epoch'] = on_end_epoch engine.train(h, get_iterator(True), maxepoch=1, optimizer=optimizer)
def plot_line(vis: visdom.Visdom, window_name: str, env: Optional[str] = None, line_label: Optional[str] = None, x: Optional[np.ndarray] = None, y: Optional[np.ndarray] = None, x_label: Optional[str] = None, y_label: Optional[str] = None, width: int = 576, height: int = 416, draw_marker: bool = False) -> str: empty_call = not vis.win_exists(window_name) if empty_call and (x is not None or y is not None): return window_name if x is None: x = np.ones(1) empty_call = empty_call & True if y is None: y = np.full(1, np.nan) empty_call = empty_call & True if x.shape != y.shape: x = np.ones_like(y) opts = { 'showlegend': True, 'markers': draw_marker, 'markersize': 5, } if empty_call: opts['title'] = window_name opts['width'] = width opts['height'] = height window_name = vis.line( X=x, Y=y, win=window_name, env=env, update='append', name=line_label, opts=opts ) xtickmin, xtickmax = 0.0, np.max(x) * 1.05 ytickmin, ytickmax = calc_ytick_range(vis, window_name, env) opts = { 'showlegend': True, 'xtickmin': xtickmin, 'xtickmax': xtickmax, 'ytickmin': ytickmin, 'ytickmax': ytickmax, 'xlabel': x_label, 'ylabel': y_label } window_name = vis.update_window_opts(win=window_name, opts=opts, env=env) return window_name
'xtickmin': 0, 'xtickmax': 100, 'xtickstep': 10, 'ytickmin': 0, 'ytickmax': 100, 'ytickstep': 10, 'markersymbol': 'cross-thin-open', 'width': 800, 'height': 600 }, ) # time.sleep(5) # 更新样式 viz.update_window_opts(win=old_scatter, opts={ 'title': 'New Scatter', 'legend': ['Apple', 'Banana'], 'markersymbol': 'dot' }) # 3D散点图 viz.scatter(X=np.random.rand(100, 3), Y=Y, opts={ 'title': '3D Scatter', 'legend': ['Men', 'Women'], 'markersize': 5 }) # 柱状图 viz.bar(X=np.random.rand(20)) viz.bar( X=np.abs(np.random.rand(5, 3)), # 5个列,每列有3部分组成
class VisShow(object): ''' Quick utility that wraps Visdom by `name`. Provide low-level interface `get_window` and `set_window` Provide high-level interface `update`, `xlabel` and `ylabel` `update`: append data to an existed window or create a new window with data `xlabel` `ylabel`: change the xlabel/ylabel of an existed window ''' def __init__(self, server: str, port: int, envdir: str, subenv: str) -> None: if server == "": self.vis = None else: self.vis = Visdom( server, port=port, env=f'{envdir}_{subenv}', raise_exceptions=False ) def get_window(self, target: str) -> Optional[str]: attrname = f'__{target}' if hasattr(self, attrname): return getattr(self, attrname) else: return None def set_window(self, name: str, win: str) -> None: attrname = f'__{name}' if hasattr(self, attrname): raise ValueError(f'{name} has existed') else: setattr(self, attrname, win) def update(self, target: str, X: List[Union[int, float]], Y: List[Union[int, float]]) -> None: if self.vis is None: return win = self.get_window(target) if not win is None: self.vis.line(Y, X, win=win, update='append') else: self.set_window(target, self.vis.line( Y, X, opts={'title': target})) def xlabel(self, target: str, label: str) -> None: if self.vis is None: return win = self.get_window(target) if not win is None: self.vis.update_window_opts(win, {'xlabel': label}) else: raise ValueError(f'{target} has not existed') def ylabel(self, target: str, label: str) -> None: if self.vis is None: return win = self.get_window(target) if not win is None: self.vis.update_window_opts(win, {'ylabel': label}) else: raise ValueError(f'{target} has not existed')
class Model: def __init__(self, opt): self.opt = opt self.device = torch.device("cuda" if opt.ngpu else "cpu") self.model, self.classifier = models.get_model(opt.net_type, opt.classifier_type, opt.pretrained, int(opt.nclasses)) self.model = self.model.to(self.device) self.classifier = self.classifier.to(self.device) if opt.ngpu > 1: self.model = nn.DataParallel(self.model) self.loss = models.init_loss(opt.loss_type) self.loss = self.loss.to(self.device) self.optimizer = utils.get_optimizer(self.model, self.opt) self.lr_scheduler = utils.get_lr_scheduler(self.opt, self.optimizer) self.alpha_scheduler = utils.get_margin_alpha_scheduler(self.opt) self.train_loader = datasets.generate_loader(opt, 'train') self.test_loader = datasets.generate_loader(opt, 'val') self.epoch = 0 self.best_epoch = False self.training = False self.state = {} self.train_loss = utils.AverageMeter() self.test_loss = utils.AverageMeter() self.batch_time = utils.AverageMeter() self.test_metrics = utils.ROCMeter() self.best_test_loss = utils.AverageMeter() self.best_test_loss.update(np.array([np.inf])) self.visdom_log_file = os.path.join(self.opt.out_path, 'log_files', 'visdom.log') self.vis = Visdom(port=opt.visdom_port, log_to_filename=self.visdom_log_file, env=opt.exp_name + '_' + str(opt.fold)) self.vis_loss_opts = { 'xlabel': 'epoch', 'ylabel': 'loss', 'title': 'losses', 'legend': ['train_loss', 'val_loss'] } self.vis_tpr_opts = { 'xlabel': 'epoch', 'ylabel': 'tpr', 'title': 'val_tpr', 'legend': ['tpr@fpr10-2', 'tpr@fpr10-3', 'tpr@fpr10-4'] } self.vis_epochloss_opts = { 'xlabel': 'epoch', 'ylabel': 'loss', 'title': 'epoch_losses', 'legend': ['train_loss', 'val_loss'] } def train(self): # Init Log file if self.opt.resume: self.log_msg('resuming...\n') # Continue training from checkpoint self.load_checkpoint() else: self.log_msg() for epoch in range(self.epoch, self.opt.num_epochs): self.epoch = epoch #freezing model if self.opt.freeze_epoch: if epoch < self.opt.freeze_epoch: if self.opt.ngpu > 1: for param in self.model.module.parameters(): param.requires_grad = False else: for param in self.model.parameters(): param.requires_grad = False elif epoch == self.opt.freeze_epoch: if self.opt.ngpu > 1: for param in self.model.module.parameters(): param.requires_grad = True else: for param in self.model.parameters(): param.requires_grad = True self.lr_scheduler.step() self.train_epoch() self.test_epoch() self.log_epoch() self.vislog_epoch() self.create_state() self.save_state() def train_epoch(self): """ Trains model for 1 epoch """ self.model.train() self.classifier.train() self.training = True torch.set_grad_enabled(self.training) self.train_loss.reset() self.batch_time.reset() time_stamp = time.time() self.batch_idx = 0 for batch_idx, (rgb_data, depth_data, ir_data, target) in enumerate(self.train_loader): self.batch_idx = batch_idx rgb_data = rgb_data.to(self.device) depth_data = depth_data.to(self.device) ir_data = ir_data.to(self.device) target = target.to(self.device) self.optimizer.zero_grad() output = self.model(rgb_data, depth_data, ir_data) if isinstance(self.classifier, nn.Linear): output = self.classifier(output) else: if self.alpha_scheduler: alpha = self.alpha_scheduler.get_alpha(self.epoch) output = self.classifier(output, target, alpha=alpha) else: output = self.classifier(output, target) if self.opt.loss_type == 'bce': target = target.float() loss_tensor = self.loss(output.squeeze(), target) else: loss_tensor = self.loss(output, target) loss_tensor.backward() self.optimizer.step() self.train_loss.update(loss_tensor.item()) self.batch_time.update(time.time() - time_stamp) time_stamp = time.time() self.log_batch(batch_idx) self.vislog_batch(batch_idx) def test_epoch(self): """ Calculates loss and metrics for test set """ self.training = False torch.set_grad_enabled(self.training) self.model.eval() self.classifier.eval() self.batch_time.reset() self.test_loss.reset() self.test_metrics.reset() time_stamp = time.time() for batch_idx, (rgb_data, depth_data, ir_data, target) in enumerate(self.test_loader): rgb_data = rgb_data.to(self.device) depth_data = depth_data.to(self.device) ir_data = ir_data.to(self.device) target = target.to(self.device) output = self.model(rgb_data, depth_data, ir_data) output = self.classifier(output) if self.opt.loss_type == 'bce': target = target.float() loss_tensor = self.loss(output.squeeze(), target) else: loss_tensor = self.loss(output, target) self.test_loss.update(loss_tensor.item()) if self.opt.loss_type == 'cce' or self.opt.loss_type == 'focal_loss': output = torch.nn.functional.softmax(output, dim=1) elif self.opt.loss_type == 'bce': output = torch.sigmoid(output) self.test_metrics.update(target.cpu().numpy(), output.cpu().numpy()) self.batch_time.update(time.time() - time_stamp) time_stamp = time.time() self.log_batch(batch_idx) #self.vislog_batch(batch_idx) if self.opt.debug and (batch_idx == 10): print('Debugging done!') break self.best_epoch = self.test_loss.avg < self.best_test_loss.val if self.best_epoch: # self.best_test_loss.val is container for best loss, # n is not used in the calculation self.best_test_loss.update(self.test_loss.avg, n=0) def calculate_metrics(self, output, target): """ Calculates test metrix for given batch and its input """ t = target o = output if self.opt.loss_type == 'bce': accuracy = (t.byte() == (o > 0.5)).float().mean(0).cpu().numpy() batch_result.append(binary_accuracy) elif self.opt.loss_type == 'cce': top1_accuracy = (torch.argmax(o, 1) == t).float().mean().item() batch_result.append(top1_accuracy) else: raise Exception('This loss function is not implemented yet') return batch_result def log_batch(self, batch_idx): if batch_idx % self.opt.log_batch_interval == 0: cur_len = len(self.train_loader) if self.training else len( self.test_loader) cur_loss = self.train_loss if self.training else self.test_loss output_string = 'Train ' if self.training else 'Test ' output_string += 'Epoch {}[{:.2f}%]: [{:.2f}({:.3f}) s]\t'.format( self.epoch, 100. * batch_idx / cur_len, self.batch_time.val, self.batch_time.avg) loss_i_string = 'Loss: {:.5f}({:.5f})\t'.format( cur_loss.val, cur_loss.avg) output_string += loss_i_string if not self.training: output_string += '\n' metrics_i_string = 'Accuracy: {:.5f}\t'.format( self.test_metrics.get_accuracy()) output_string += metrics_i_string print(output_string) def vislog_batch(self, batch_idx): if batch_idx % self.opt.log_batch_interval == 0: loader_len = len(self.train_loader) if self.training else len( self.test_loader) cur_loss = self.train_loss if self.training else self.test_loss loss_type = 'train_loss' if self.training else 'val_loss' x_value = self.epoch + batch_idx / loader_len y_value = cur_loss.val self.vis.line([y_value], [x_value], name=loss_type, win='losses', update='append') self.vis.update_window_opts(win='losses', opts=self.vis_loss_opts) def log_msg(self, msg=''): mode = 'a' if msg else 'w' f = open(os.path.join(self.opt.out_path, 'log_files', 'train_log.txt'), mode) f.write(msg) f.close() def log_epoch(self): """ Epoch results log string""" out_train = 'Train: ' out_test = 'Test: ' loss_i_string = 'Loss: {:.5f}\t'.format(self.train_loss.avg) out_train += loss_i_string loss_i_string = 'Loss: {:.5f}\t'.format(self.test_loss.avg) out_test += loss_i_string out_test += '\nTest: ' metrics_i_string = 'TPR@FPR=10-2: {:.4f}\t'.format( self.test_metrics.get_tpr(0.01)) metrics_i_string += 'TPR@FPR=10-3: {:.4f}\t'.format( self.test_metrics.get_tpr(0.001)) metrics_i_string += 'TPR@FPR=10-4: {:.4f}\t'.format( self.test_metrics.get_tpr(0.0001)) out_test += metrics_i_string is_best = 'Best ' if self.best_epoch else '' out_res = is_best + 'Epoch {} results:\n'.format( self.epoch) + out_train + '\n' + out_test + '\n' print(out_res) self.log_msg(out_res) def vislog_epoch(self): x_value = self.epoch self.vis.line([self.train_loss.avg], [x_value], name='train_loss', win='epoch_losses', update='append') self.vis.line([self.test_loss.avg], [x_value], name='val_loss', win='epoch_losses', update='append') self.vis.update_window_opts(win='epoch_losses', opts=self.vis_epochloss_opts) self.vis.line([self.test_metrics.get_tpr(0.01)], [x_value], name='tpr@fpr10-2', win='val_tpr', update='append') self.vis.line([self.test_metrics.get_tpr(0.001)], [x_value], name='tpr@fpr10-3', win='val_tpr', update='append') self.vis.line([self.test_metrics.get_tpr(0.0001)], [x_value], name='tpr@fpr10-4', win='val_tpr', update='append') self.vis.update_window_opts(win='val_tpr', opts=self.vis_tpr_opts) def create_state(self): self.state = { # Params to be saved in checkpoint 'epoch' : self.epoch, 'model_state_dict' : self.model.state_dict(), 'classifier_state_dict': self.classifier.state_dict(), 'best_test_loss' : self.best_test_loss, 'optimizer': self.optimizer.state_dict(), 'lr_scheduler': self.lr_scheduler.state_dict(), } def save_state(self): if self.opt.log_checkpoint == 0: self.save_checkpoint('checkpoint.pth') else: if (self.epoch % self.opt.log_checkpoint == 0): self.save_checkpoint('model_{}.pth'.format(self.epoch)) def save_checkpoint( self, filename): # Save model to task_name/checkpoints/filename.pth fin_path = os.path.join(self.opt.out_path, 'checkpoints', filename) torch.save(self.state, fin_path) if self.best_epoch: best_fin_path = os.path.join(self.opt.out_path, 'checkpoints', 'model_best.pth') torch.save(self.state, best_fin_path) def load_checkpoint(self): # Load current checkpoint if exists fin_path = os.path.join(self.opt.out_path, 'checkpoints', self.opt.resume) if os.path.isfile(fin_path): print("=> loading checkpoint '{}'".format(fin_path)) checkpoint = torch.load(fin_path, map_location=lambda storage, loc: storage) self.epoch = checkpoint['epoch'] + 1 self.best_test_loss = checkpoint['best_test_loss'] self.model.load_state_dict(checkpoint['model_state_dict']) self.classifier.load_state_dict( checkpoint['classifier_state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) #self.lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) print("=> loaded checkpoint '{}' (epoch {})".format( self.opt.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(self.opt.resume)) if os.path.isfile(self.visdom_log_file): self.vis.replay_log(log_filename=self.visdom_log_file)
xtickmax=50, xtickstep=0.5, ytickmin=-50, ytickmax=50, ytickstep=0.5, markersymbol='cross-thin-open', ), ) viz.update_window_opts( win=old_scatter, opts=dict( legend=['Apples', 'Pears'], xtickmin=0, xtickmax=1, xtickstep=0.5, ytickmin=0, ytickmax=1, ytickstep=0.5, markersymbol='cross-thin-open', ), ) viz.scatter( X=np.random.rand(100, 3), Y=(Y + 1.5).astype(int), opts=dict( legend=['Men', 'Women'], markersize=5, ) )
class Model: def __init__(self, opt): self.opt = opt self.device = torch.device("cuda" if opt.ngpu else "cpu") self.model, self.classifier = models.get_model(opt.net_type, opt.loss_type, opt.pretrained, int(opt.nclasses)) self.model = self.model.to(self.device) self.classifier = self.classifier.to(self.device) if opt.ngpu>1: self.model = nn.DataParallel(self.model) self.loss = models.init_loss(opt.loss_type) self.loss = self.loss.to(self.device) self.optimizer = utils.get_optimizer(self.model, self.opt) self.lr_scheduler = utils.get_lr_scheduler(self.opt, self.optimizer) self.train_loader = datasets.generate_loader(opt,'train') self.test_loader = datasets.generate_loader(opt,'val') self.epoch = 0 self.best_epoch = False self.training = False self.state = {} self.train_loss = utils.AverageMeter() self.test_loss = utils.AverageMeter() self.batch_time = utils.AverageMeter() if self.opt.loss_type in ['cce', 'bce', 'mse', 'arc_margin']: self.test_metrics = utils.AverageMeter() else: self.test_metrics = utils.ROCMeter() self.best_test_loss = utils.AverageMeter() self.best_test_loss.update(np.array([np.inf])) self.visdom_log_file = os.path.join(self.opt.out_path, 'log_files', 'visdom.log') self.vis = Visdom(port = opt.visdom_port, log_to_filename=self.visdom_log_file, env=opt.exp_name + '_' + str(opt.fold)) self.vis_loss_opts = {'xlabel': 'epoch', 'ylabel': 'loss', 'title':'losses', 'legend': ['train_loss', 'val_loss']} self.vis_epochloss_opts = {'xlabel': 'epoch', 'ylabel': 'loss', 'title':'epoch_losses', 'legend': ['train_loss', 'val_loss']} def train(self): # Init Log file if self.opt.resume: self.log_msg('resuming...\n') # Continue training from checkpoint self.load_checkpoint() else: self.log_msg() for epoch in range(self.epoch, self.opt.num_epochs): self.epoch = epoch ''' if epoch < 0: for param in self.model.module.body.parameters(): param.requires_grad=False elif epoch == 0: for param in self.model.module.body.parameters(): param.requires_grad=True ''' self.lr_scheduler.step() self.train_epoch() self.test_epoch() self.log_epoch() self.vislog_epoch() self.create_state() self.save_state() def train_epoch(self): """ Trains model for 1 epoch """ self.model.train() self.classifier.train() self.training = True torch.set_grad_enabled(self.training) self.train_loss.reset() self.batch_time.reset() time_stamp = time.time() self.batch_idx = 0 for batch_idx, (data, target) in enumerate(self.train_loader): self.batch_idx = batch_idx data = data.to(self.device) target = target.to(self.device) self.optimizer.zero_grad() output = self.model(data) if isinstance(self.classifier, nn.Linear): output = self.classifier(output) else: output = self.classifier(output, target) if self.opt.loss_type == 'bce' or self.opt.loss_type == 'mse': target = target.float() loss_tensor = self.loss(output.squeeze(), target) else: loss_tensor = self.loss(output, target) loss_tensor.backward() self.optimizer.step() self.train_loss.update(loss_tensor.item()) self.batch_time.update(time.time() - time_stamp) time_stamp = time.time() self.log_batch(batch_idx) self.vislog_batch(batch_idx) if self.opt.debug and (batch_idx==10): print('Debugging done!') break; def test_epoch(self): """ Calculates loss and metrics for test set """ self.training = False torch.set_grad_enabled(self.training) self.model.eval() self.classifier.eval() self.batch_time.reset() self.test_loss.reset() self.test_metrics.reset() time_stamp = time.time() for batch_idx, (data, target) in enumerate(self.test_loader): data = data.to(self.device) target = target.to(self.device) output = self.model(data) output = self.classifier(output) if self.opt.loss_type == 'bce' or self.opt.loss_type == 'mse': target = target.float() loss_tensor = self.loss(output.squeeze(), target) else: loss_tensor = self.loss(output, target) self.test_loss.update(loss_tensor.item()) if self.opt.loss_type == 'cce': output = torch.nn.functional.softmax(output, dim=1) elif self.opt.loss_type.startswith('arc_margin'): output = torch.nn.functional.softmax(output, dim=1) elif self.opt.loss_type == 'bce': output = torch.sigmoid(output) metrics = self.calculate_metrics(output, target) self.test_metrics.update(metrics) self.batch_time.update(time.time() - time_stamp) time_stamp = time.time() self.log_batch(batch_idx) #self.vislog_batch(batch_idx) if self.opt.debug and (batch_idx==10): print('Debugging done!') break; self.best_epoch = self.test_loss.avg < self.best_test_loss.val if self.best_epoch: # self.best_test_loss.val is container for best loss, # n is not used in the calculation self.best_test_loss.update(self.test_loss.avg, n=0) def calculate_metrics(self, output, target): """ Calculates test metrix for given batch and its input """ batch_result = None t = target o = output if self.opt.loss_type == 'bce': binary_accuracy = (t.byte()==(o>0.5)).float().mean(0).cpu().numpy() batch_result = binary_accuracy elif self.opt.loss_type =='mse': mean_average_error = torch.abs(t-o.squeeze()).mean(0).cpu().numpy() batch_result = mean_average_error elif self.opt.loss_type == 'cce' or self.opt.loss_type == 'arc_margin': top1_accuracy = (torch.argmax(o, 1)==t).float().mean().item() batch_result = top1_accuracy else: raise Exception('This loss function is not implemented yet') return batch_result def log_batch(self, batch_idx): if batch_idx % self.opt.log_batch_interval == 0: cur_len = len(self.train_loader) if self.training else len(self.test_loader) cur_loss = self.train_loss if self.training else self.test_loss output_string = 'Train ' if self.training else 'Test ' output_string +='Epoch {}[{:.2f}%]: [{:.2f}({:.3f}) s]\t'.format(self.epoch, 100.* batch_idx/cur_len, self.batch_time.val,self.batch_time.avg) loss_i_string = 'Loss: {:.5f}({:.5f})\t'.format(cur_loss.val, cur_loss.avg) output_string += loss_i_string print(output_string) def vislog_batch(self, batch_idx): loader_len = len(self.train_loader) if self.training else len(self.test_loader) cur_loss = self.train_loss if self.training else self.test_loss loss_type = 'train_loss' if self.training else 'val_loss' x_value = self.epoch + batch_idx / loader_len y_value = cur_loss.val self.vis.line([y_value], [x_value], name=loss_type, win='losses', update='append') self.vis.update_window_opts(win='losses', opts=self.vis_loss_opts) def log_msg(self, msg=''): mode = 'a' if msg else 'w' f = open(os.path.join(self.opt.out_path, 'log_files', 'train_log.txt'), mode) f.write(msg) f.close() def log_epoch(self): """ Epoch results log string""" out_train = 'Train: ' out_test = 'Test: ' loss_i_string = 'Loss: {:.5f}\t'.format(self.train_loss.avg) out_train += loss_i_string loss_i_string = 'Loss: {:.5f}\t'.format(self.test_loss.avg) out_test += loss_i_string out_test+='\nTest: ' out_test+= '{0}\t{1:.4f}\t'.format(self.opt.loss_type, self.test_metrics.avg) is_best = 'Best ' if self.best_epoch else '' out_res = is_best+'Epoch {} results:\n'.format(self.epoch)+out_train+'\n'+out_test+'\n' print(out_res) self.log_msg(out_res) def vislog_epoch(self): x_value = self.epoch self.vis.line([self.train_loss.avg], [x_value], name='train_loss', win='epoch_losses', update='append') self.vis.line([self.test_loss.avg], [x_value], name='val_loss', win='epoch_losses', update='append') self.vis.update_window_opts(win='epoch_losses', opts=self.vis_epochloss_opts) ''' LEGACY CODE ''' ''' def adjust_lr(self): if self.opt.lr_type == 'step_lr': Set the LR to the initial LR decayed by lr_decay_lvl every lr_decay_period epochs lr = self.opt.lr * (self.opt.lr_decay_lvl ** ((self.epoch+1) // self.opt.lr_decay_period)) for param_group in self.optimizer.param_groups: param_group['lr'] = lr elif self.opt.lr_type == 'cosine_lr': Cosine LR by [email protected] and [email protected] n_batches = len(self.train_loader) t_total = self.opt.num_epochs * n_batches t_cur = ((self.epoch) % self.opt.num_epochs) * n_batches t_cur += self.batch_idx lr_scale = 0.5 * (1 + math.cos(math.pi * t_cur / t_total)) lr_scale_prev = 0.5 * (1 + math.cos( math.pi * np.clip((t_cur - 1), 0, t_total) / t_total)) lr_scale_change = lr_scale / lr_scale_prev self.lr *= lr_scale_change if self.batch_idx % self.opt.log_batch_interval == 0 and self.batch_idx == 0: print (f'LR: {self.lr:.4f}') for param_group in self.optimizer.param_groups: param_group['lr'] = self.lr else: raise Exception('Unexpected lr type') ''' def create_state(self): self.state = { # Params to be saved in checkpoint 'epoch' : self.epoch, 'model_state_dict' : self.model.state_dict(), 'classifier_state_dict': self.classifier.state_dict(), 'best_test_loss' : self.best_test_loss, 'optimizer': self.optimizer.state_dict(), 'lr_scheduler': self.lr_scheduler.state_dict(), } def save_state(self): if self.opt.log_checkpoint == 0: self.save_checkpoint('checkpoint.pth') else: if (self.epoch % self.opt.log_checkpoint == 0): self.save_checkpoint('model_{}.pth'.format(self.epoch)) def save_checkpoint(self, filename): # Save model to task_name/checkpoints/filename.pth fin_path = os.path.join(self.opt.out_path,'checkpoints', filename) torch.save(self.state, fin_path) if self.best_epoch: best_fin_path = os.path.join(self.opt.out_path, 'checkpoints', 'model_best.pth') torch.save(self.state, best_fin_path) def load_checkpoint(self): # Load current checkpoint if exists fin_path = os.path.join(self.opt.out_path,'checkpoints',self.opt.resume) if os.path.isfile(fin_path): print("=> loading checkpoint '{}'".format(fin_path)) checkpoint = torch.load(fin_path, map_location=lambda storage, loc: storage) self.epoch = checkpoint['epoch'] + 1 self.best_test_loss = checkpoint['best_test_loss'] self.model.load_state_dict(checkpoint['model_state_dict']) self.classifier.load_state_dict(checkpoint['classifier_state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) self.lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) print("=> loaded checkpoint '{}' (epoch {})".format(self.opt.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(self.opt.resume)) if os.path.isfile(self.visdom_log_file): self.vis.replay_log(log_filename=self.visdom_log_file)
default=DEFAULT_HOSTNAME, help='Server address of the target to run the demo on.') FLAGS = parser.parse_args() viz = Visdom(port=FLAGS.port, server=FLAGS.server) hub = "/Users/afq/Google Drive/networks/" eoses = [ "water_slgc_logp_64", # "water_lg", # "water_linear", ] for eos in eoses: # netplots = rout.plot_networks(surfs) # Subplots simplots = rout.make_simulation_plot_list( hub + 'test_databases/' + eos + '_testing.db', eos) for p in simplots: viz.plotlyplot(p, env=eos) # The monolithic plot # simplots = rout.plot_simulations(hub+'test_databases/'+eos+'_testing.db',eos) # viz.plotlyplot(simplots, win='win_'+eos, env=eos) surfs = rout.read_networks(hub + 'training_' + eos) viz.update_window_opts('win_' + eos, {'width': 500, 'height': 500}) netplots = rout.generate_trisurf_plots(surfs) for n, p in netplots: viz.plotlyplot(p, win='net_' + eos + n, env=eos) viz.update_window_opts('net_' + eos + n, {'width': 200, 'height': 200})