def validate(val_loader, model, criterion): batch_time = tools.AverageMeter('Time', ':6.3f') losses = tools.AverageMeter('Loss', ':.4e') top1 = tools.AverageMeter('Acc@1', ':2.2f') top5 = tools.AverageMeter('Acc@5', ':2.2f') progress = tools.ProgressMeter(len(val_loader), batch_time, losses) model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): input = input.cuda() target = target.cuda() output = model(input) loss = criterion(output, target) acc1, acc5 = tools.accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) batch_time.update(time.time() - end) end = time.time() return top1.avg.data.cpu().numpy(), losses.avg
def eval_dataset_cls(cfg_path, device=None): """分类问题的eval dataset: 等效于runner中的load_from + val,但可用来脱离runner进行独立的数据集验证 """ # 准备验证所用的对象 cfg = get_config(cfg_path) dataset = get_dataset(cfg.valset, cfg.transform_val) dataloader = get_dataloader(dataset, cfg.valloader) model = get_model(cfg) if device is None: device = torch.device(cfg.load_device) # TODO: 如下两句的顺序 load_checkpoint(model, cfg.load_from, device) model = model.to(device) # 开始验证 buffer = {'acc': []} n_correct = 0 model.eval() for c_iter, data_batch in enumerate(dataloader): with torch.no_grad(): # 停止反向传播,只进行前向计算 img = to_device(data_batch['img'], device) label = to_device(data_batch['gt_labels'], device) y_pred = model(img) label = torch.cat(label, dim=0) acc1 = accuracy(y_pred, label, topk=1) buffer['acc'].append(acc1) # 计算总体精度 n_correct += buffer['acc'][-1] * len(data_batch['gt_labels']) vis_loss_acc(buffer, title='eval dataset') print('ACC on dataset: %.3f', n_correct / len(dataset))
def evaluate(self, valid_loader, model, criterion, epoch): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for step, (input, target) in enumerate(valid_loader): if self.args.gpus > 0: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.cpu().data, target.cpu().data, topk=(1, 5)) losses.update(loss.cpu().data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if step % self.args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( step, len(valid_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) record = OrderedDict([ ["epoch", epoch], ["time", batch_time.avg], ["loss", losses.avg], ["top1", top1.avg], ["top5", top5.avg], ]) with open(self.valid_file, "a") as fp: fp.write(json.dumps(record) + "\n") return top1.avg
def forward_train(self, imgs, labels, **kwargs): preds = self.backbone(imgs) if self.cfg.neck: preds = self.neck(preds) if self.cfg.head: preds = self.cls_head(preds) # 计算损失 labels = torch.cat(labels, dim=0) # (n,) loss_inputs = [preds, labels] out_dict = self.get_losses(*loss_inputs) # TODO: 增加acc计算 acc = accuracy(preds, labels, topk=(1, 5)) out_dict.update(acc1=acc[0]) out_dict.update(acc5=acc[1]) return out_dict
def train(train_loader, model, criterion, optimizer, epoch, scheduler, mixup=False): batch_time = tools.AverageMeter('Time', ':6.3f') losses = tools.AverageMeter('Loss', ':.4e') top1 = tools.AverageMeter('Acc@1', ':2.2f') top5 = tools.AverageMeter('Acc@5', ':2.2f') progress = tools.ProgressMeter(len(train_loader), batch_time, losses, top1, top5) model.train() end = time.time() for i, (input, target) in enumerate(train_loader): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) scheduler.step(epoch + i / len(train_loader)) optimizer.zero_grad() if mixup == False: output = model(input) loss = criterion(output, target) else: output, loss = tools.cutmix(input, target, model, criterion) acc1, acc5 = tools.accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % 100 == 0: progress.pr2int(i) return top1.avg.data.cpu().numpy(), losses.avg
def main(): root = "/home/fanyang/PycharmProjects/SignLanguage/data/robotfailuer" file_name = 'lp1.data.txt' file_path = os.path.join(root, file_name) dataset = load_data_robot(file_name=file_path) print("num data in dataset ", len(dataset)) for i in range(5): train_data, dev_data = k_fold(dataset=dataset, bin_id=i) train_batch, train_label = train_data dev_batch, dev_label = dev_data train_batch, dev_batch = normalize_data(train_batch, dev_batch) # model model = SimpleNN1DCNNRobot(num_classes=4) model.cuda() model.criterion = nn.CrossEntropyLoss() # using sgd instead of Adam model.optimizer = optim.SGD(params=model.parameters(), lr=1e-4) train_batch = Variable(train_batch).cuda() train_label = Variable(train_label).cuda() dev_batch = Variable(dev_batch, volatile=True).cuda() dev_label = Variable(dev_label, volatile=True).cuda() # print(train_label.size(), train_batch.size()) # print(dev_label.size(), dev_batch.size()) # exit() # prepare writer writer_dir = 'ckpt/robot-1-bin-id-%d' % i saver_dir = writer_dir writer = SummaryWriter(writer_dir) for i in count(): model.train() logits = model(train_batch, writer) loss = model.criterion(logits, train_label) model.optimizer.zero_grad() loss.backward() model.optimizer.step() print("epoch:{}, loss:{}".format(i, loss.cpu().data.numpy()[0])) writer.add_scalar('train/loss', loss.cpu().data.numpy(), global_step=i) writer.add_scalar('train/accu', tools.accuracy(logits, train_label).cpu().data.numpy(), global_step=i) torch.save(model.state_dict(), os.path.join(saver_dir, 'record-step-%d-model.pkl' % i)) # just save the latest 5 parameters checkpoints if i >= 5: os.remove( os.path.join(saver_dir, 'record-step-%d-model.pkl' % i)) tools.adjust_learning_rate(model.optimizer) # switch the model to the evaluation mode model.eval() logits = model(dev_batch) loss = model.criterion(logits, dev_label) writer.add_scalar('val/loss', loss.cpu().data.numpy(), global_step=i) writer.add_scalar('val/accu', tools.accuracy(logits, dev_label).cpu().data.numpy(), global_step=i)
def train(self, train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() samples_per_second = AverageMeter() # switch to train mode model.train() end = time.time() for step, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if self.args.gpus > 0: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.cpu().data, target.cpu().data, topk=(1, 5)) losses.update(loss.cpu().data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time / executed samples elapsed_time = time.time() - end batch_time.update(elapsed_time) total_samples = self.args.batch_size / elapsed_time # forward samples per second samples_per_second.update(total_samples) end = time.time() if step % self.args.print_freq == 0: print( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.1f} (average {data_time.avg:.1f} samples/s)\t' 'Loss {loss.val:.4f} (average {loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} (average {top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} (average {top5.avg:.3f})'.format( epoch, (step + 1), len(train_loader), batch_time=batch_time, data_time=samples_per_second, loss=losses, top1=top1, top5=top5)) record = OrderedDict([ # ["iter", i / len(train_loader)], ["epoch", epoch], ["time", batch_time.val], ["loss", losses.val], ["top1", top1.val], ["top5", top5.val], ]) with open(self.train_file, "a") as fp: fp.write(json.dumps(record) + "\n") self.writer.add_scalar("time", batch_time.val, epoch) self.writer.add_scalar("loss", losses.val, epoch) self.writer.add_scalar("top1", top1.val, epoch) self.writer.add_scalar("top5", top5.val, epoch)
def eval(loader, model, epoch, criterion, threshold, use_cuda=None): """ :param loader: :param model: :param epoch: :param criterion: :param threshold: :param use_cuda: :return: """ model.eval() acc_top1_meter = AverageMeter() acc_top5_meter = AverageMeter() precision_meter = AverageMeter() acc_meter = AverageMeter() f2_score_meter = AverageMeter() loss_meter = AverageMeter() batch_time_meter = AverageMeter() output_list = [] target_list = [] start_time = time.time() pbar = tqdm(loader) with torch.no_grad(): for batch_idx, (inputs, targets, _) in enumerate(pbar): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # input_var = torch.autograd.Variable(inputs) input_var = inputs if args.multi_label and args.loss == 'nll': # pick one of the labels for validation loss, should we randomize like in train? # target_var = autograd.Variable(target.max(dim=1)[1].squeeze(), volatile=True) # target_var = torch.autograd.Variable(targets.max(dim=1)[1].squeeze()) target_var = targets.max(dim=1)[1].squeeze() else: # target_var = autograd.Variable(target, volatile=True) # target_var = torch.autograd.Variable(targets) target_var = targets # calculate output outputs = model(input_var) # calculate loss loss = criterion(outputs, target_var) loss_meter.update(loss.item(), input_var.size(0)) # --------------------------------metric----------------------------------- # multi label if args.multi_label: if args.loss == 'nll': outputs = F.softmax(outputs) else: outputs = torch.sigmoid(outputs) acc, p, _, f2_score = scores(outputs.data, target_var.data, threshold) acc_meter.update(acc, outputs.size(0)) precision_meter.update(p, outputs.size(0)) f2_score_meter.update(f2_score, outputs.size(0)) # single label else: acc_1, acc_5 = accuracy(outputs.data, targets, topk=(1, 5)) acc_top1_meter.update(acc_1[0], outputs.size(0)) acc_top5_meter.update(acc_5[0], outputs.size(0)) batch_time_meter.update(time.time() - start_time) pbar.set_description('Eval Epoch: {} [{}/{} ({:.0f}%)]'.format( epoch, batch_idx * len(input_var), len(loader.sampler), 100. * batch_idx / len(loader))) if args.multi_label: pbar.set_postfix_str( 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Acc {acc.val:.4f} ({acc.avg:.4f}) ' 'Prec {prec.val:.4f} ({prec.avg:.4f}) ' 'F2 {f2.val:.4f} ({f2.avg:.4f})'.format( batch_time=batch_time_meter, loss=loss_meter, acc=acc_meter, prec=precision_meter, f2=f2_score_meter), refresh=True) else: pbar.set_postfix_str( 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.4f} ({top1.avg:.4f}) ' 'Prec@5 {top5.val:.4f} ({top5.avg:.4f})'.format( batch_time=batch_time_meter, loss=loss_meter, top1=acc_top1_meter, top5=acc_top5_meter), refresh=True) # record output and target to search best threshold for per category target_list.append(targets.cpu().numpy()) output_list.append(outputs.data.cpu().numpy()) start_time = time.time() # ----------------------------------update threshold------------------------------------------- output_total = np.concatenate(output_list, axis=0) target_total = np.concatenate(target_list, axis=0) if args.multi_label: new_threshold, f2 = optimise_f2_thresholds(target_total, output_total, verbose=False) metrics = [('loss', loss_meter.avg), ('acc', acc_meter.avg), ('f2', f2)] print('latest threshold {} => best f2-score {}'.format( new_threshold, f2)) else: f2 = f2_score(output_total, target_total, threshold=0.5) new_threshold = [] metrics = [('loss', loss_meter.avg), ('acc_top1', acc_top1_meter.avg), ('acc_top5', acc_top5_meter.avg), ('f2', f2)] return OrderedDict(metrics), new_threshold
def train(loader, model, epoch, criterion, optimizer, threshold, class_weights=None, use_cuda=None): """ :param loader: :param model: :param epoch: :param criterion: :param optimizer: :param threshold: :param class_weights: :param use_cuda: :return: """ model.train() global global_step acc_top1_meter = AverageMeter() acc_top5_meter = AverageMeter() precision_meter = AverageMeter() acc_meter = AverageMeter() f2_score_meter = AverageMeter() loss_meter = AverageMeter() batch_time_meter = AverageMeter() start_time = time.time() pbar = tqdm(loader) for batch_idx, (inputs, targets, _) in enumerate(pbar): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # input_var = torch.autograd.Variable(inputs) input_var = inputs if args.multi_label and args.loss == 'null': # if multi-label and nll setting, train network by sampling an index using class weights if class_weights is not None: # normalize class weights target_weights = targets * torch.unsqueeze( class_weights, 0).expand_as(targets) sum_weights = target_weights.sum( dim=1, keepdim=True).expand_as(target_weights) target_weights = target_weights.div(sum_weights) else: target_weights = targets # target_var = torch.autograd.Variable(torch.multinomial(target_weights, 1).squeeze().long()) target_var = torch.multinomial(target_weights, 1).squeeze().long() else: # target_var = torch.autograd.Variable(targets) target_var = targets outputs = model(input_var) loss = criterion(outputs, target_var) loss_meter.update(loss.item(), input_var.size(0)) # grad clearing optimizer.zero_grad() # computer grad loss.backward() # with amp.scale_loss(loss, optimizer) as scaled_loss: # scaled_loss.backward() # update params optimizer.step() global_step += 1 # pbar.set_description('train loss {0}'.format(loss.item()), refresh=False) # --------------------------metric--------------------------------------- if args.loss == 'nll': outputs = F.softmax(outputs) else: outputs = torch.sigmoid(outputs) if args.multi_label: acc, p, _, f2_score = scores(outputs.data, target_var.data, threshold) acc_meter.update(acc, outputs.size(0)) precision_meter.update(p, outputs.size(0)) f2_score_meter.update(f2_score, outputs.size(0)) else: acc_1, acc_5 = accuracy(outputs.data, targets, topk=(1, 5)) acc_top1_meter.update(acc_1[0], outputs.size(0)) acc_top5_meter.update(acc_5[0], outputs.size(0)) batch_time_meter.update(time.time() - start_time) pbar.set_description('Train Epoch: {} [{}/{} ({:.0f}%)]'.format( epoch, batch_idx * len(input_var), len(loader.sampler), 100. * batch_idx / len(loader))) if args.multi_label: pbar.set_postfix_str( 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Acc {acc.val:.4f} ({acc.avg:.4f}) ' 'Prec {prec.val:.4f} ({prec.avg:.4f}) ' 'F2 {f2.val:.4f} ({f2.avg:.4f})'.format( batch_time=batch_time_meter, loss=loss_meter, acc=acc_meter, prec=precision_meter, f2=f2_score_meter), refresh=True) else: pbar.set_postfix_str( 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.4f} ({top1.avg:.4f}) ' 'Prec@5 {top5.val:.4f} ({top5.avg:.4f})'.format( batch_time=batch_time_meter, loss=loss_meter, top1=acc_top1_meter, top5=acc_top5_meter), refresh=True) start_time = time.time() # writer train log if (global_step + 1) % args.summary_iter == 0: writer.add_scalar(tag='train/loss', scalar_value=loss.cpu().item(), global_step=global_step) writer.add_scalar(tag='train/acc', scalar_value=acc, global_step=global_step) writer.add_scalar(tag='train/precision', scalar_value=p, global_step=global_step) if args.multi_label: metrics = OrderedDict([('loss', loss_meter.avg), ('acc', acc_meter.avg)]) else: metrics = OrderedDict([('loss', loss_meter.avg), ('acc_top1', acc_top1_meter.avg), ('acc_top5', acc_top5_meter.avg)]) # pbar.write() return metrics
def __step_train(i, hr, lr, epoch_length, epoch_dis, epoch_gen, dis_trained, gen_trained, sr_fadein, log_this_it, save_this_it): ''' 1 Input Handling: - high resolution frames to cuda - first and last frame of low resolution sequence to cuda - pad inputs with H or W < 128 ''' assert(len(hr) == len(lr) == (args.interpolationsteps + 2)) hr = [ Variable(frame.cuda() if args.use_cuda else frame, requires_grad = args.log_gradients) for frame in hr] lr = [ Variable(frame.cuda() if args.use_cuda else frame, requires_grad = args.log_gradients) for frame in lr] lr_start = lr[0] lr_end = lr[-1] ''' 2.1 Recurrent interpolation: - frames - charbonnier loss - perceptual loss - ping-pong loss ''' outputs_sr, outputs_lr = generator( frame_start = lr_start, frame_end = lr_end, low_memory = args.low_memory) loss_charbonnier_sr = loss_function.charbonnier_loss(output = outputs_sr, target = hr, epsilon = args.epsilon) loss_charbonnier_lr = loss_function.charbonnier_loss(output = outputs_lr[1:-1], target = lr[1:-1], epsilon = args.epsilon) if args.loss_perceptual: loss_perceptual = 0 for h, s in zip(hr, outputs_sr): vgg_real = vgg(x = h, normalize = True) vgg_fake = vgg(x = s, normalize = True) loss_perceptual += loss_function.cosine_similarity(vgg_real, vgg_fake) if args.loss_pingpong: #TODO: LR ping pong as well? outputs_sr_rev = generator(frame_start = lr_end, frame_end = lr_start, low_memory = args.low_memory)[0] outputs_sr_rev.reverse() loss_pingpong = loss_function.pingpong_loss(outputs_sr, outputs_sr_rev) ''' 2.2 Discriminator: - GAN loss - Layer loss ''' if args.gan: discriminator_input_real = generator.prepare_discriminator_inputs( sr = hr, frame0 = lr[0], frame1 = lr[-1], temporal = args.temporal, spatial = args.spatial, context = args.context, depth = args.depth, flow = args.flow) discriminator_input_fake = generator.prepare_discriminator_inputs( sr = outputs_sr, frame0 = lr[0], frame1 = lr[-1], temporal = args.temporal, spatial = args.spatial, context = args.context, depth = args.depth, flow = args.flow) score_real, layers_real = discriminator(discriminator_input_real.detach()) score_fake, _ = discriminator(discriminator_input_fake.detach()) score_gen , layers_gen = discriminator(discriminator_input_fake) loss_gen, loss_dis, loss_real, loss_fake = loss_function.gan_loss( score_real = score_real, score_fake = score_fake, score_gen = score_gen, epsilon = args.gan_epsilon, batchsize = args.batch_size) loss_layer = loss_function.layerloss(layers_real, layers_gen) if args.loss_layer else 0 ''' 3 Loss handling: - weighting - balance generator and discriminator power - backpropagation ''' # Combine losses loss_total = loss_charbonnier_lr * args.scale_lr / (args.scale_lr + args.scale_sr * sr_fadein) loss_total += loss_charbonnier_sr * args.scale_sr * sr_fadein / (args.scale_lr + args.scale_sr * sr_fadein) if args.loss_layer: loss_total += loss_layer * args.scale_layer * sr_fadein if args.loss_perceptual: loss_total += loss_perceptual * args.scale_perceptual * sr_fadein if args.loss_pingpong: loss_total += loss_pingpong * args.scale_pingpong * sr_fadein if args.gan: loss_total += loss_gen * args.scale_gan * sr_fadein # Scheduling if args.gan: gen_behindness = math.fabs(loss_gen.item()) - math.fabs(loss_real.item()) train_dis = False if args.freeze_dis else (True if args.freeze_gen else (gen_behindness < args.dis_threshold)) train_gen = False if args.freeze_gen else (True if args.freeze_dis else (gen_behindness > args.gen_threshold)) dis_trained += train_dis gen_trained += train_gen epoch_dis += train_dis epoch_gen += train_gen # Backpropagation optimizer_d.zero_grad() optimizer_g.zero_grad() if train_dis: loss_dis.backward(retain_graph = train_gen) # intermediate layers of discriminator used for layerloss for generator if train_gen: loss_total.backward() else: optimizer_g.zero_grad() loss_total.backward() train_gen = True ''' 4 Logging: - AverageMeters to print to screen - Losses to tensorboard - Gradients and weights to tensorboard - Save debugging frames to disk ''' if args.gan: t_acc_real, t_acc_fake = tools.accuracy(score_real = torch.mean(score_real).item(), score_fake = torch.mean(score_fake).item()) t_accuracy.update(val = 0.5*t_acc_real + 0.5*t_acc_fake, weight = args.batch_size) t_loss_charbonnier_sr.update(val = loss_charbonnier_sr.item(), n = args.batch_size) t_loss_charbonnier_lr.update(val = loss_charbonnier_lr.item(), n = args.batch_size) t_loss_perceptual.update(val = loss_perceptual.item() if args.loss_perceptual else 0, n = args.batch_size) t_loss_pingpong.update(val = loss_pingpong.item() if args.loss_pingpong else 0, n = args.batch_size) logger.log_scalars(tag = 'Generator', tag_value_dict = { 'total': loss_total.item(), 'charbonnier': loss_charbonnier_sr.item(), 'charbonnier_lr': loss_charbonnier_lr.item(), 'gan': loss_gen.item() if args.gan else 0, 'layer': loss_layer.item() if args.loss_layer else 0, 'pingpong': loss_pingpong.item() if args.loss_pingpong else 0, 'perceptual': loss_perceptual.item() if args.loss_perceptual else 0}, epoch = t, n_batch = i, num_batches = epoch_length) if args.gan: logger.log_scalars(tag = 'Discriminator/loss', tag_value_dict = { 'real': loss_real.item(), 'fake': loss_fake.item(), 'gen_behindness': gen_behindness}, epoch = t, n_batch = i, num_batches = epoch_length) logger.log_scalars(tag = 'Discriminator/scores', tag_value_dict = { 'real': torch.mean(score_real).item(), 'fake': torch.mean(score_fake).item()}, epoch = t, n_batch = i, num_batches = epoch_length) logger.log_scalars(tag = 'Discriminator/detection_performance', tag_value_dict = { 'real': t_acc_real, 'fake': t_acc_fake, 'avg': t_accuracy.avg}, epoch = t, n_batch = i, num_batches = epoch_length) logger.log_scalars(tag = 'Scheduling', tag_value_dict = { 'train_discriminator': 1 if train_dis else 0, 'train_generator': 1 if train_gen else 0, 'gen_behindness': gen_behindness}, epoch = t, n_batch = i, num_batches = epoch_length) logger.log_scalars(tag = 'Overview_trained', tag_value_dict = { 'generator': gen_trained, 'discriminator': dis_trained}, epoch = t, n_batch = i, num_batches = epoch_length) if args.log_gradients: if train_gen: logger.log_histogram(tag = 'weights/filter', values = tools.model_parameters(generator.initScaleNets_filter, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/filter', values = tools.model_parameters(generator.initScaleNets_filter, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'weights/filter1', values = tools.model_parameters(generator.initScaleNets_filter1, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/filter1', values = tools.model_parameters(generator.initScaleNets_filter1, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'weights/filter2', values = tools.model_parameters(generator.initScaleNets_filter2, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/filter2', values = tools.model_parameters(generator.initScaleNets_filter2, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'weights/ctxNet', values = tools.model_parameters(generator.ctxNet, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/ctxNet', values = tools.model_parameters(generator.ctxNet, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'weights/flownets', values = tools.model_parameters(generator.flownets, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/flownets', values = tools.model_parameters(generator.flownets, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'weights/depthNet', values = tools.model_parameters(generator.depthNet, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/depthNet', values = tools.model_parameters(generator.depthNet, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'weights/rectifyNet', values = tools.model_parameters(generator.rectifyNet, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/rectifyNet', values = tools.model_parameters(generator.rectifyNet, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'weights/mergeNet', values = tools.model_parameters(generator.mergeNet, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/mergeNet', values = tools.model_parameters(generator.mergeNet, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'weights/upscaleNet', values = tools.model_parameters(generator.upscaleNet, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/upscaleNet', values = tools.model_parameters(generator.upscaleNet, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) if args.gan and train_dis: logger.log_histogram(tag = 'weights/discriminator', values = tools.model_parameters(discriminator, 'weights'), epoch = t, n_batch = i, num_batches = epoch_length) logger.log_histogram(tag = 'gradients/discriminator', values = tools.model_parameters(discriminator, 'gradients'), epoch = t, n_batch = i, num_batches = epoch_length) # Print debugging frames if args.debug_output and (log_this_it or i == 0): if args.tb_debugframes: logger.save_images(tag = args.unique_id + '_' + str(t).zfill(3) + '_' + str(i).zfill(5) + '/real', image = torch.cat([ t for t in hr ], dim = 0), epoch = t, n_batch = i, num_batches = epoch_length) logger.save_images(tag = args.unique_id + '_' + str(t).zfill(3) + '_' + str(i).zfill(5) + '/fake', image = torch.cat([ t for t in outputs_sr ], dim = 0), epoch = t, n_batch = i, num_batches = epoch_length) tools.print_tensor( path = os.path.join(debug_root, args.unique_id + '_' + str(t).zfill(3) + '_' + str(i).zfill(5) + '_real.png'), img = tools.printable_tensor([ t.detach().cpu().numpy() for t in hr ])) tools.print_tensor( path = os.path.join(debug_root, args.unique_id + '_' + str(t).zfill(3) + '_' + str(i).zfill(5) + '_fake.png'), img = tools.printable_tensor([ t.detach().cpu().numpy() for t in outputs_sr ])) if args.loss_pingpong: if args.tb_debugframes: logger.save_images(tag = args.unique_id + '_' + str(t).zfill(3) + '_' + str(i).zfill(5) + '/rev', image = torch.cat([ t for t in outputs_sr_rev ], dim = 0), epoch = t, n_batch = i, num_batches = epoch_length) tools.print_tensor( path = os.path.join(debug_root, args.unique_id + '_' + str(t).zfill(3) + '_' + str(i).zfill(5) + '_rev.png'), img = tools.printable_tensor([ t.detach().cpu().numpy() for t in outputs_sr_rev ])) ''' 5 Finish: - optimizer step - save intermediate weights ''' if args.gan and train_dis: if args.gradient_scaling > 0: tools.rescale_gradients(model = discriminator, magnitude = args.gradient_scaling) optimizer_d.step() if train_gen: if args.gradient_scaling > 0: tools.rescale_gradients(model = generator, magnitude = args.gradient_scaling) optimizer_g.step() # Save intermediate weights if save_this_it: torch.save(generator.state_dict(), os.path.join(args.save_path, str(t).zfill(3) + '_' + str(i).zfill(6) + "_GEN.pth")) if args.gan: torch.save(discriminator.state_dict(), os.path.join(args.save_path, str(t).zfill(3) + '_' + str(i).zfill(6) + "_DIS.pth")) return epoch_dis, epoch_gen, dis_trained, gen_trained