def train(train_loader, model, criterion, optimizer, epoch): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() for i, (input, target) in enumerate(train_loader): input, target = input.cuda(), target.cuda() torch.cuda.synchronize() data_time = time.time() - end # compute pred end = time.time() pred = model(input) loss = criterion(pred, target) optimizer.zero_grad() loss.backward() # compute gradient and do SGD step optimizer.step() torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss result = Result() result.evaluate(pred.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() if (i + 1) % args.print_freq == 0: print('=> output: {}'.format(output_directory)) print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'MAE={result.mae:.2f}({average.mae:.2f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'REL={result.absrel:.3f}({average.absrel:.3f}) ' 'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format( epoch, i + 1, len(train_loader), data_time=data_time, gpu_time=gpu_time, result=result, average=average_meter.average())) avg = average_meter.average() with open(train_csv, 'a') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writerow({ 'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10, 'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3, 'gpu_time': avg.gpu_time, 'data_time': avg.data_time })
def artif_run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl, val_dl, val_dl_cluster, eval_fn, device): train_loss = AverageMeter() model.train() for i, batch in enumerate(train_dl): X = batch['X'] y = batch['label'] X = X.to(device) A = (y.unsqueeze(-2) == y.unsqueeze(-1)).float().to(device) logits = model(X) loss = criterion(logits, A) loss.backward() train_loss.update(loss.item()) # optimize optimizer.step() optimizer.zero_grad() # Eval metrics_log = {'val_loss': None, 'val_ari': None, 'val_nmi': None, 'val_acc': None, 'val_tpr': None, 'val_tnr': None, 'val_num_failures': None} if args.eval_during_training: if ((t+1)%args.eval_freq==0) or (t==0): metrics_log = eval_fn(model, criterion, val_dl_cluster, args, device, True) else: metrics_log = eval_fn(model, criterion, val_dl, args, device, False) early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args, device, False) metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss'] metrics_log['train_loss'] = train_loss.avg return metrics_log
def train_one_epoch(epoch, model, train_loader, optimizer, tokenizer, params): device = params.device avg_loss = AverageMeter() avg_acc = Accuracy(ignore_index=-1) model.train() for i, batch in enumerate(train_loader): optimizer.zero_grad() batch = batch.to(device) # segment = create_dummy_segment(batch) inputs, labels = mask_tokens(batch, tokenizer, params) inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs, masked_lm_labels=labels) loss, prediction_scores = outputs[:2] # model outputs are always tuple in transformers (see doc) loss.backward() optimizer.step() avg_acc.update(prediction_scores.view(-1, params.vocab_size), labels.view(-1)) avg_loss.update(loss.item()) logging.info('Train-E-{}: loss: {:.4f}'.format(epoch, avg_loss()))
def train_epoch(train_loader, model, loss_fn, optimizer, use_cuda, logger): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.train() end_time = time.time() for i, (input, label) in enumerate(tqdm(train_loader)): data_time.update(time.time() - end_time) if use_cuda: label = label.cuda() input = input.cuda() with torch.no_grad(): input_var = torch.autograd.Variable(input) label_var = torch.autograd.Variable(label) output = model(input_var) loss = loss_fn(output, label_var) prec1, prec5 = accuracy(output.data, label, topk=(1, 5)) losses.update(loss.data, input.size(0)) top1.update(prec1, input.size(0)) top5.update(prec5, input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end_time) end_time = time.time() logger.info( ' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, logger): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() batch_num = len(train_loader) for i, (input, target) in enumerate(train_loader): # itr_count += 1 input, target = input.cuda(), target.cuda() # print('input size = ', input.size()) # print('target size = ', target.size()) torch.cuda.synchronize() data_time = time.time() - end # compute pred end = time.time() pred = model(input) # @wx 注意输出 # print('pred size = ', pred.size()) # print('target size = ', target.size()) loss = criterion(pred, target) optimizer.zero_grad() loss.backward() # compute gradient and do SGD step optimizer.step() torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss result = Result() result.evaluate(pred.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() if (i + 1) % args.print_freq == 0: print('=> output: {}'.format(output_directory)) print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'Loss={Loss:.5f} ' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'RML={result.absrel:.2f}({average.absrel:.2f}) ' 'Log10={result.lg10:.3f}({average.lg10:.3f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'Delta2={result.delta2:.3f}({average.delta2:.3f}) ' 'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format( epoch, i + 1, len(train_loader), data_time=data_time, gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average())) current_step = epoch * batch_num + i logger.add_scalar('Train/RMSE', result.rmse, current_step) logger.add_scalar('Train/rml', result.absrel, current_step) logger.add_scalar('Train/Log10', result.lg10, current_step) logger.add_scalar('Train/Delta1', result.delta1, current_step) logger.add_scalar('Train/Delta2', result.delta2, current_step) logger.add_scalar('Train/Delta3', result.delta3, current_step) avg = average_meter.average()
def validate(val_loader, model, criterion_tracking, criterion_completion, epoch, lambda_completion=0): batch_time = AverageMeter() data_time = AverageMeter() loss_tracking = AverageMeter() loss_completion = AverageMeter() losses = AverageMeter() # switch to evaluate mode model.eval() with tqdm(enumerate(val_loader), total=len(val_loader), ncols=120) as t: with torch.no_grad(): end = time.time() for i, (this_PC, prev_PC, model_PC, target) in t: # measure data loading time data_time.update(time.time() - end) this_PC = this_PC.cuda() prev_PC = prev_PC.cuda() model_PC = model_PC.cuda() target = target.cuda(non_blocking=True).view(-1) output, prev_PC_AE = model(this_PC, model_PC) if lambda_completion < 1: loss1 = criterion_tracking(output, target) else: loss1 = torch.tensor([0]).float().cuda() if lambda_completion != 0: loss2 = criterion_completion(prev_PC_AE, model_PC) else: loss2 = torch.tensor([0]).float().cuda() loss = loss1 + lambda_completion * loss2 # measure accuracy and record loss loss_tracking.update(loss1.item(), this_PC.size(0)) loss_completion.update(loss2.item(), this_PC.size(0)) losses.update(loss.item(), this_PC.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() t.set_description(f'Valid {epoch}: ' f'Time {batch_time.avg:.3f}s ' f'(it:{batch_time.val:.3f}s) ' f'Data:{data_time.avg:.3f}s ' f'(it:{data_time.val:.3f}s) ' f'Loss {losses.avg:.4f} ' f'(tr:{loss_tracking.avg:.4f}, ' f'comp:{loss_completion.avg:.0f})') return losses.avg
def validate(epoch, valData, model, logger): average_meter = AverageMeter() model.eval() # switch to evaluate mode end = time.time() # skip = len(valData) // 9 # save images every skip iters for i, (image, depth) in enumerate(valData): image = image.cuda() depth = depth.cuda() # normal = normal.cuda() torch.cuda.synchronize() data_time = time.time() - end end = time.time() with torch.no_grad(): pred = model(image) torch.cuda.synchronize() gpu_time = time.time() - end result = Result() result.evaluate(pred.data, depth.data) average_meter.update(result, gpu_time, data_time, image.size(0)) end = time.time() if (i + 1) % 10 == 0: print('Test Epoch: [{0}/{1}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'RML={result.absrel:.2f}({average.absrel:.2f}) ' 'Log10={result.lg10:.3f}({average.lg10:.3f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'Delta2={result.delta2:.3f}({average.delta2:.3f}) ' 'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format( i + 1, len(valData), data_time=data_time, gpu_time=gpu_time, result=result, average=average_meter.average())) avg = average_meter.average() print('\n*\n' 'RMSE={average.rmse:.3f}\n' 'Rel={average.absrel:.3f}\n' 'Log10={average.lg10:.3f}\n' 'Delta1={average.delta1:.3f}\n' 'Delta2={average.delta2:.3f}\n' 'Delta3={average.delta3:.3f}\n' 't_GPU={time:.3f}\n'.format( average=avg, time=avg.gpu_time)) logger.add_scalar('Test/rmse', avg.rmse, epoch) logger.add_scalar('Test/Rel', avg.absrel, epoch) logger.add_scalar('Test/log10', avg.lg10, epoch) logger.add_scalar('Test/Delta1', avg.delta1, epoch) logger.add_scalar('Test/Delta2', avg.delta2, epoch) logger.add_scalar('Test/Delta3', avg.delta3, epoch) return avg
def train(train_loader, model, criterion, optimizer, epoch): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() for i, (input, target) in enumerate(train_loader): input, target = input.cuda(), target.cuda() torch.cuda.synchronize() data_time = time.time() - end # compute pred end = time.time() pred = model(input) loss = criterion(pred, target) optimizer.zero_grad() loss.backward() # compute gradient and do SGD step optimizer.step() torch.cuda.synchronize() gpu_time = time.time() - end depth_in = np.hstack(input.data.cpu().numpy()[:4, 3] / 10.) depth_in = cv2.applyColorMap((depth_in * 255).astype(np.uint8), cv2.COLORMAP_HOT) tgt_out = np.hstack(np.squeeze(target[:4].data.cpu().numpy())) / 10. tgt_out = cv2.applyColorMap((tgt_out * 255).astype(np.uint8), cv2.COLORMAP_HOT) out = np.hstack(np.squeeze(pred[:4].data.cpu().numpy())) out = np.clip(out / 10., 0., 1.) out = cv2.applyColorMap((out * 255).astype(np.uint8), cv2.COLORMAP_HOT) if i % 20 == 0: cv2.imshow("Training Results", np.vstack([depth_in, tgt_out, out])) cv2.waitKey(1) # measure accuracy and record loss result = Result() result.evaluate(pred.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() if (i + 1) % args.print_freq == 0: print('=> output: {}'.format(output_directory)) print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'MAE={result.mae:.2f}({average.mae:.2f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'REL={result.absrel:.3f}({average.absrel:.3f}) ' 'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format( epoch, i+1, len(train_loader), data_time=data_time, gpu_time=gpu_time, result=result, average=average_meter.average())) avg = average_meter.average() with open(train_csv, 'a') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writerow({'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10, 'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3, 'gpu_time': avg.gpu_time, 'data_time': avg.data_time})
def pdf_run_epoch(t, args, model, criterion, optimizer, train_dl, val_dl, early_stop_dl, val_dl_cluster, eval_fn, device): # Train train_loss = AverageMeter() model.train() for i, batch in enumerate(train_dl): batch['X'] = batch['X'].unsqueeze(0) batch['label'] = batch['label'].unsqueeze(0) B, N = batch['X'].shape[0], batch['X'].shape[1] anchor_idxs = sample_anchors(B, N) anchor_labels = batch['label'][torch.arange(B), anchor_idxs].unsqueeze(1) target = (batch['label'] == anchor_labels).float().to(device) # Forward logits = model(batch['X'].to(device), anchor_idxs) loss = criterion(logits.squeeze(-1), target) # Backward loss.backward() train_loss.update(loss.item()) # Optimize if (i + 1) % args.batch_size == 0: optimizer.step() optimizer.zero_grad() # Eval metrics_log = { 'val_loss': None, 'val_ari': None, 'val_nmi': None, 'val_acc': None, 'val_tpr': None, 'val_tnr': None, 'val_num_failures': None } if args.eval_during_training: if ((t + 1) % args.eval_freq == 0) or (t == 0): metrics_log = eval_fn(model, criterion, val_dl_cluster, args, device, True) else: metrics_log = eval_fn(model, criterion, val_dl, args, device, False) # Compute early stop loss early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args, device, False) metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss'] metrics_log['train_loss'] = train_loss.avg return metrics_log
def train(train_loader, model, fc, criterion, optimizer, epoch): print('\n Training model...\n') batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() ratio = combine_ratio * (epoch / (args.epochs)) # switch to train mode model.train() fc.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.to(device) input_var = Variable(input) target_var = Variable(target) optimizer.zero_grad() # compute output loss, output = criterion(model, fc, input_var, target_var, ratio) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) loss.backward() optimizer.step() lr_scheduler.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) print(' * Acc@1 {top1.avg:.3f} Acc@1 Error {top1_err:.3f}\n' ' * Acc@5 {top5.avg:.3f} Acc@5 Error {top5_err:.3f}' .format(top1=top1, top1_err=100-top1.avg, top5=top5, top5_err=100-top5.avg))
def pdf_run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl, val_dl, val_dl_cluster, eval_fn, device): train_loss = AverageMeter() model.train() for i, batch in enumerate(train_dl): # Convert batch to be compatible with DAC model batch = batch_to_dac_compatible(batch, args.augment_pdf_data) # Forward loss = model.loss_fn_anchored(batch['X'].to(device), batch['label'].to(device)) # Backward loss.backward() train_loss.update(loss.item()) # optimize if (i + 1) % args.batch_size == 0: optimizer.step() optimizer.zero_grad() # Last batch might not be whole optimizer.step() optimizer.zero_grad() # Eval metrics_log = { 'val_loss': None, 'val_ari': None, 'val_nmi': None, 'val_acc': None, 'val_tpr': None, 'val_tnr': None, 'val_num_failures': None } if args.eval_during_training: if ((t + 1) % args.eval_freq == 0) or (t == 0): metrics_log = eval_fn(model, criterion, val_dl_cluster, args, device, True) else: metrics_log = eval_fn(model, criterion, val_dl, args, device, False) # Compute early stop loss early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args, device, False) metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss'] metrics_log['train_loss'] = train_loss.avg return metrics_log
def run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl, val_dl, val_dl_cluster, eval_fn, device): train_loss = AverageMeter() model.train() for i, batch in enumerate(train_dl): # print(f'train batch: {i}/{len(train_dl)}') # Convert batch to bags if args.pair_indicators: batch = batch_to_mil_pair_indicators(batch, args.n_pairs, args.train_pairs_replacement) else: batch = batch_to_mil_pairs(batch, args.n_pairs, args.stratify, args.pair_bag_len) # Forward logits = model((batch['X'].to(device), batch['B'].to(device))) loss = criterion(logits, batch['T'].to(device)) # Backward loss.backward() optimizer.step() optimizer.zero_grad() train_loss.update(loss.item()) # Eval metrics_log = { 'val_loss': None, 'val_ari': None, 'val_nmi': None, 'val_acc': None, 'val_tpr': None, 'val_tnr': None, 'val_num_failures': None } if args.eval_during_training: if ((t + 1) % args.eval_freq == 0) or (t == 0): metrics_log = eval_fn(model, criterion, val_dl_cluster, args, device, True) else: metrics_log = eval_fn(model, criterion, val_dl, args, device, False) # Compute early stop loss early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args, device, False) metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss'] metrics_log['train_loss'] = train_loss.avg return metrics_log
def train(epoch, trainData, model, crite, optimizer, logger): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() for i, (image, depth) in enumerate(trainData): image = image.cuda() depth = depth.cuda() # normal = normal.cuda() # image = torch.autograd.Variable(image) # depth = torch.autograd.Variable(depth) torch.cuda.synchronize() data_time = time.time() - end end = time.time() optimizer.zero_grad() pred = model(image) loss = crite(pred, depth) loss.backward() optimizer.step() torch.cuda.synchronize() gpu_time = time.time() - end result = Result() result.evaluate(pred.data, depth.data) average_meter.update(result, gpu_time, data_time, image.size(0)) end = time.time() if (i + 1) % 10 == 0: print('=> output: {}'.format(opt.output_dir)) print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'Loss={Loss:.5f} ' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'RML={result.absrel:.2f}({average.absrel:.2f}) ' 'Log10={result.lg10:.3f}({average.lg10:.3f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'Delta2={result.delta2:.3f}({average.delta2:.3f}) ' 'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format( epoch, i + 1, len(trainData), data_time=data_time, gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average())) current_step = epoch * len(trainData) + i logger.add_scalar('Train/loss', loss, current_step) logger.add_scalar('Train/RMSE', result.rmse, current_step) logger.add_scalar('Train/rml', result.absrel, current_step) logger.add_scalar('Train/Log10', result.lg10, current_step) logger.add_scalar('Train/Delta1', result.delta1, current_step) logger.add_scalar('Train/Delta2', result.delta2, current_step) logger.add_scalar('Train/Delta3', result.delta3, current_step)
def pdf_eval_fn(model, criterion, val_dl, args, device, eval_clusters): with torch.no_grad(): ari = AverageMeter() nmi = AverageMeter() val_loss = AverageMeter() num_failures = AverageMeter() model.eval() for batch in val_dl: # Convert batch to be compatible with DAC model batch = batch_to_dac_compatible(batch) loss = model.loss_fn_anchored(batch['X'].to(device), batch['label'].to(device)) val_loss.update(loss.item()) # Cluster if eval_clusters: params, labels, ll, fail = model.cluster_anchored( batch['X'].to(device), max_iter=args.max_iter, verbose=False, check=True) labels = labels[0].cpu().numpy() true_labels = batch['label'][0].argmax(-1).numpy() ari.update(adjusted_rand_score(true_labels, labels)) nmi.update( normalized_mutual_info_score(true_labels, labels, average_method='arithmetic')) num_failures.update(int(fail)) if eval_clusters: metrics_log = { 'val_loss': val_loss.avg, 'val_ari': ari.avg, 'val_nmi': nmi.avg, 'val_acc': None, 'val_tpr': None, 'val_tnr': None, 'val_num_failures': num_failures.avg } else: metrics_log = { 'val_loss': val_loss.avg, 'val_ari': None, 'val_nmi': None, 'val_acc': None, 'val_tpr': None, 'val_tnr': None, 'val_num_failures': None } return metrics_log
def compute_depth_metrics(self, verbose=True) -> Result: """Computes metrics on the difference between raw and fixed depth values""" avg = AverageMeter() for i, path in enumerate(self.paths): _, depth_raw, depth_fix = self.load_images(path) depth_raw = torch.tensor(depth_raw) depth_fix = torch.tensor(depth_fix) res = Result() res.evaluate(depth_raw, depth_fix) avg.update(res, 0, 0, 1) if verbose: stdout.write(f"=> computing img {i}/{len(self)}\r") if verbose: stdout.write("\n") return avg.average()
def validate(val_loader, model, criterion, epoch, writer=None): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for step, (input, target) in enumerate(val_loader): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(input) output = torch.mean(output, dim=2) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if step % config.TEST.PRINT_FREQ == 0: print(('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( step, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5))) print(( 'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses))) if writer: writer.add_scalar('val/loss', losses.avg, epoch + 1) writer.add_scalar('val/top1', top1.avg, epoch + 1) writer.add_scalar('val/top5', top5.avg, epoch + 1) return losses.avg
def train_coarse(train_loader, model, criterion, optimizer, epoch): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() for i, (input, target) in enumerate(train_loader): input, target = input.cuda(), target.cuda() torch.cuda.synchronize() data_time = time.time() - end # compute pred end = time.time() pred = model(input) loss = criterion(pred, target) optimizer.zero_grad() loss.backward() # compute gradient and do SGD step optimizer.step() torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss end = time.time() result = Result() result.evaluate(pred.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) eval_time = time.time() - end if (i + 1) % args.print_freq == 0: history_loss.append(loss.item()) print('=> output: {}'.format(output_directory)) print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'MAE={result.mae:.2f}({average.mae:.2f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'REL={result.absrel:.3f}({average.absrel:.3f}) ' 'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format( epoch, i + 1, len(train_loader), data_time=data_time, gpu_time=gpu_time, result=result, average=average_meter.average()))
def validate_epoch(val_loader, model, loss_fn, use_cuda): top1 = AverageMeter() top5 = AverageMeter() model.eval() for i, (input, label) in enumerate(tqdm(val_loader)): with torch.no_grad(): if use_cuda: label = label.cuda() input = input.cuda() input_var = torch.autograd.Variable(input) label_var = torch.autograd.Variable(label) output = model(input_var) loss = loss_fn(output, label_var) prec1, prec5 = accuracy(output.data, label, topk=(1, 5)) top1.update(prec1, input.size(0)) top5.update(prec5, input.size(0)) print(' **Test** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg
def test(test_loader, model, criterion, epoch): print('\n Running inference on test data...\n') # switch to evaluate mode model.eval() with torch.no_grad(): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for i, (input, target) in enumerate(test_loader): target = target.to(device) input = input.to(device) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target.data, topk=(1, 5)) losses.update(loss.data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq // 4 == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(test_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Acc@1 {top1.avg:.3f} Acc@1 Error {top1_err:.3f}\n' ' * Acc@5 {top5.avg:.3f} Acc@5 Error {top5_err:.3f}'.format( top1=top1, top1_err=100 - top1.avg, top5=top5, top5_err=100 - top5.avg)) return top1, top5, losses
def evaluate(epoch, test_loader, tokenizer, params): device = params.device avg_loss = AverageMeter() model.eval() with torch.no_grad(): for i, batch in enumerate(test_loader): batch = batch.to(device) # segment = create_dummy_segment(batch) inputs, labels = mask_tokens(batch, tokenizer, params) inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs, masked_lm_labels=labels) loss = outputs[0] # model outputs are always tuple in transformers (see doc) avg_loss.update(loss.item()) logging.info('Test-E-{}: loss: {:.4f}'.format(epoch, avg_loss())) return avg_loss()
def test_cls(self, val_loader, opts, log_file): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() self.model.eval() end = time.time() with torch.no_grad(): for i, (inputs, target) in enumerate(val_loader): # inputs, target = inputs.cuda(), target.cuda() target = target.cuda() output = self.model(inputs) loss = self.criterion(output, target) prec = accuracy(output, target)[0] losses.update(loss.item(), target.size(0)) top1.update(prec.item(), target.size(0)) batch_time.update(time.time() - end) end = time.time() if i % opts.log_interval == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec {top1.val:.4f}% ({top1.avg:.4f}%)'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1)) print(' * Prec {top1.avg:.4f}% '.format(top1=top1)) files = WriteData(log_file) files.write_data_txt('test: {top1.avg:.4f}% '.format(top1=top1)) return top1.avg
def evaluate(params, loader, model, experiment): print("Testing...") with experiment.test() and torch.no_grad(): average = AverageMeter() end = time.time() for i, (inputs, targets) in enumerate(loader): inputs, targets = inputs.to(params["device"]), targets.to( params["device"]) data_time = time.time() - end # Predict end = time.time() outputs = model(inputs) gpu_time = time.time() - end # Clip prediction outputs[outputs > params["depth_max"]] = params["depth_max"] outputs[outputs < params["depth_min"]] = params["depth_min"] result = Result() result.evaluate(outputs.data, targets.data) average.update(result, gpu_time, data_time, inputs.size(0)) # Log images to comet img_merged = utils.log_image_to_comet(inputs[0], targets[0], outputs[0], epoch=0, id=i, experiment=experiment, result=result, prefix="visual_test") if params["save_test_images"]: filename = os.path.join( params["experiment_dir"], "image_{}_epoch_{}.png".format(i, str(params["start_epoch"]))) utils.save_image(img_merged, filename)
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (ip, target) in enumerate(val_loader): # noinspection DuplicatedCode ip = ip.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(ip) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), ip.size(0)) top1.update(prec1[0], ip.size(0)) top5.update(prec5[0], ip.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 100 == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg
def train(self, trainloader, epoch, opts, log_file): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() self.model.train() end = time.time() for i, (inputs, target) in enumerate(trainloader): data_time.update(time.time() - end) target = target.cuda() output = self.model(inputs) loss = self.criterion(output, target) prec = accuracy(output, target)[0] losses.update(loss.item(), target.size(0)) top1.update(prec.item(), target.size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % opts.log_interval == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec {top1.val:.4f}% ({top1.avg:.4f}%)'.format( epoch, i, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) files = WriteData(log_file) files.write_data_txt('train: {top1.avg:.4f}%'.format(top1=top1) + ' {loss.avg:.4f}'.format(loss=losses))
def validate(val_loader, model, epoch, write_to_file=True): average_meter = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, (input, target) in enumerate(val_loader): input, target = input.cuda(), target.cuda() # torch.cuda.synchronize() data_time = time.time() - end # compute output end = time.time() with torch.no_grad(): pred = model(input) # torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss result = Result() result.evaluate(pred.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() # save 8 images for visualization skip = 50 if args.modality == 'd': img_merge = None else: if args.modality == 'rgb': rgb = input elif args.modality == 'rgbd': rgb = input[:, :3, :, :] depth = input[:, 3:, :, :] if i == 0: if args.modality == 'rgbd': img_merge = utils.merge_into_row_with_gt( rgb, depth, target, pred) else: img_merge = utils.merge_into_row(rgb, target, pred) elif (i < 8 * skip) and (i % skip == 0): if args.modality == 'rgbd': row = utils.merge_into_row_with_gt(rgb, depth, target, pred) else: row = utils.merge_into_row(rgb, target, pred) img_merge = utils.add_row(img_merge, row) elif i == 8 * skip: filename = output_directory + '/comparison_' + str( epoch) + '.png' utils.save_image(img_merge, filename) if (i + 1) % args.print_freq == 0: print('Test: [{0}/{1}]\t' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'MAE={result.mae:.2f}({average.mae:.2f})\n\t' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'REL={result.absrel:.3f}({average.absrel:.3f}) ' 'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format( i + 1, len(val_loader), gpu_time=gpu_time, result=result, average=average_meter.average())) avg = average_meter.average() print('\n*\n' 'RMSE={average.rmse:.3f}\n' 'MAE={average.mae:.3f}\n' 'Delta1={average.delta1:.3f}\n' 'REL={average.absrel:.3f}\n' 'Lg10={average.lg10:.3f}\n' 't_GPU={time:.3f}\n'.format(average=avg, time=avg.gpu_time)) if write_to_file: with open(test_csv, 'a') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writerow({ 'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10, 'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3, 'data_time': avg.data_time, 'gpu_time': avg.gpu_time }) return avg, img_merge
def validate(localizer, adversarial, dataloader, experiment_directory, labels, segmentation_map_threshold, num_classes, evaluate=False, save_results=False): """ Loop over the validation set (in batches) to acquire relevant metrics """ print('Validating...') if evaluate: metrics = Metrics(20) localizer_criterion = torch.nn.BCELoss() adversarial_criterion = torch.nn.BCELoss() localizer_loss_meter = AverageMeter() adversarial_loss_meter = AverageMeter() for i, (inputs, targets) in enumerate(dataloader): if evaluate: # Segmentation maps are included in the targets targets, segmentation_maps = targets else: segmentation_maps = None if torch.cuda.is_available(): inputs, targets = inputs.cuda(), targets.cuda() output, gcams = localizer(inputs, labels=targets) loss = localizer_criterion(output, targets) localizer_loss_meter.update(loss.item()) gcams, new_images, new_targets, original_targets = gcams if adversarial is not None or save_results: new_batch_size = gcams.size(0) masks = gcam_to_mask(gcams) masked_image = erase_mask(new_images, masks) if adversarial is not None: adversarial_output = adversarial(masked_image) adversarial_output = torch.sigmoid(adversarial_output) adversarial_loss = adversarial_criterion( adversarial_output, original_targets) adversarial_loss_meter.update(adversarial_loss.item()) if save_results: for k in range(new_batch_size): number = f'{i * new_batch_size + k}' #TODO: fix label_string = labels[new_targets[k]] file_postfix = f'{number}_{label_string}' save_location = os.path.join( experiment_directory, f'heatmap_{file_postfix}.png') save_gradcam(filename=save_location, gcam=gcams[k, 0].detach(), raw_image=new_images[k].clone()) save_location = os.path.join( experiment_directory, f'raw_heatmap_{file_postfix}.png') save_gradcam(filename=save_location, gcam=gcams[k, 0].detach()) save_location = os.path.join(experiment_directory, f'erased_{file_postfix}.png') tensor2imwrite(save_location, denormalize(masked_image[k])) if evaluate: # Generate and visualize predicted segmentation map predicted_segmentation_maps = generate_segmentation_map( gcams, num_classes, segmentation_maps.shape[1:], new_targets, threshold=segmentation_map_threshold) metrics.update(predicted_segmentation_maps, segmentation_maps) if save_results: predicted_indices = predicted_segmentation_maps.unique() all_labels = ['background', *labels] predicted_labels = [ all_labels[idx] for idx in predicted_indices ] labels_string = '_'.join(predicted_labels) filename = f'map_{i:04d}_{labels_string}.png' save_location = os.path.join(experiment_directory, filename) save_segmentation_map(save_location, predicted_segmentation_maps, denormalize(new_images[k]).clone()) filename = f'map_raw_{i:04d}_{labels_string}.png' save_location = os.path.join(experiment_directory, filename) save_segmentation_map(save_location, predicted_segmentation_maps) print('Validation localizer loss:', localizer_loss_meter.avg) print('Validation adversarial loss:', adversarial_loss_meter.avg) if evaluate: miou = metrics.miou().item() precision = metrics.precision(skip_background=True).item() recall = metrics.recall(skip_background=True).item() metrics.print_scores_per_class() print('mIoU:', miou) print('precision:', precision) print('recall:', recall)
def train(cfg, writer, logger): # This statement must be declared before using pytorch use_cuda = False if cfg.get("cuda", None) is not None: if cfg.get("cuda", None) != "all": os.environ["CUDA_VISIBLE_DEVICES"] = cfg.get("cuda", None) use_cuda = torch.cuda.is_available() # Setup random seed seed = cfg["training"].get("seed", random.randint(1, 10000)) torch.manual_seed(seed) if use_cuda: torch.cuda.manual_seed(seed) np.random.seed(seed) random.seed(seed) # Setup Dataloader train_loader, val_loader = get_loader(cfg) # Setup Model model = get_model(cfg) # writer.add_graph(model, torch.rand([1, 3, 224, 224])) if use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model, device_ids=list( range(torch.cuda.device_count()))) # Setup optimizer, lr_scheduler and loss function optimizer = get_optimizer(model.parameters(), cfg) scheduler = get_scheduler(optimizer, cfg) loss_fn = get_loss_fn(cfg) # Setup Metrics epochs = cfg["training"]["epochs"] recorder = RecorderMeter(epochs) start_epoch = 0 # save model parameters every <n> epochs save_interval = cfg["training"]["save_interval"] if use_cuda: model.cuda() loss_fn.cuda() # Resume Trained Model resume_path = os.path.join(writer.file_writer.get_logdir(), cfg["training"]["resume"]) best_path = os.path.join(writer.file_writer.get_logdir(), cfg["training"]["best_model"]) if cfg["training"]["resume"] is not None: if os.path.isfile(resume_path): logger.info( "Loading model and optimizer from checkpoint '{}'".format( resume_path)) checkpoint = torch.load(resume_path) state = checkpoint["state_dict"] if torch.cuda.device_count() <= 1: state = convert_state_dict(state) model.load_state_dict(state) optimizer.load_state_dict(checkpoint["optimizer"]) scheduler.load_state_dict(checkpoint["scheduler"]) start_epoch = checkpoint["epoch"] recorder = checkpoint['recorder'] logger.info("Loaded checkpoint '{}' (epoch {})".format( resume_path, checkpoint["epoch"])) else: logger.info("No checkpoint found at '{}'".format(resume_path)) epoch_time = AverageMeter() for epoch in range(start_epoch, epochs): start_time = time.time() need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) logger.info( '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:8.6f}]'. format(time_string(), epoch, epochs, need_time, optimizer. param_groups[0]['lr']) + # scheduler.get_last_lr() >=1.4 ' [Best : Accuracy={:.2f}]'.format(recorder.max_accuracy(False))) train_acc, train_los = train_epoch(train_loader, model, loss_fn, optimizer, use_cuda, logger) val_acc, val_los = validate_epoch(val_loader, model, loss_fn, use_cuda, logger) scheduler.step() is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) if is_best or epoch % save_interval == 0 or epoch == epochs - 1: # save model (resume model and best model) save_checkpoint( { 'epoch': epoch + 1, 'recorder': recorder, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), }, is_best, best_path, resume_path) for name, param in model.named_parameters(): # save histogram writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch) writer.add_scalar('Train/loss', train_los, epoch) # save curves writer.add_scalar('Train/acc', train_acc, epoch) writer.add_scalar('Val/loss', val_los, epoch) writer.add_scalar('Val/acc', val_acc, epoch) epoch_time.update(time.time() - start_time) writer.close()
def train_epoch(self): tqdm_batch = tqdm(self.dataloader, total=self.dataset.num_iterations, desc="epoch-{}".format(self.epoch)) image_sample = None Tensor = torch.cuda.FloatTensor avg_generator_loss = AverageMeter() avg_barZ_disc_loss = AverageMeter() avg_phraseZ_disc_loss = AverageMeter() for curr_it, (note, pre_note, pre_phrase, position) in enumerate(tqdm_batch): note = note.cuda(async=self.config.async_loading) pre_note = pre_note.cuda(async=self.config.async_loading) pre_phrase = pre_phrase.cuda(async=self.config.async_loading) position = position.cuda(async=self.config.async_loading) note = Variable(note) pre_note = Variable(pre_note) pre_phrase = Variable(pre_phrase) position = Variable(position) valid_target = Variable(Tensor(note.size(0)).fill_(1.0), requires_grad=False) fake_target = Variable(Tensor(note.size(0)).fill_(0.0), requires_grad=False) valid_target_double = Variable(Tensor(note.size(0) * 2).fill_(1.0), requires_grad=False) self.iteration += 1 #################### self.generator.train() self.z_discriminator_bar.train() self.z_discriminator_phrase.train() self.generator.zero_grad() self.z_discriminator_bar.zero_grad() self.z_discriminator_phrase.zero_grad() if self.epoch > self.pretraining_step_size and (self.epoch + curr_it) % 2 is 0: #################### Discriminator #################### self.free(self.z_discriminator_bar) self.free(self.z_discriminator_phrase) self.frozen(self.generator) _, z, pre_z, phrase_feature = self.generator( note, pre_note, pre_phrase, position) #### Phrase Feature ### phrase_fake = (torch.randn(phrase_feature.size(0), phrase_feature.size(1)) * self.config.sigma).cuda() d_phrase_fake = self.z_discriminator_phrase(phrase_fake).view( -1) d_phrase_real = self.z_discriminator_phrase( phrase_feature).view(-1) phraseZ_dics_loss = self.loss_phrase(d_phrase_real, fake_target) +\ self.loss_phrase(d_phrase_fake, valid_target) #### Bar Feature #### bar_fake = (torch.randn(z.size(0) * 2, z.size(1)) * self.config.sigma).cuda() d_bar_fake = self.z_discriminator_bar(bar_fake).view(-1) d_bar_real1 = self.z_discriminator_bar(z).view(-1) d_bar_real2 = self.z_discriminator_bar(pre_z).view(-1) barZ_dics_loss = self.loss_bar(d_bar_real1, fake_target) + self.loss_bar(d_bar_real2, fake_target) + \ self.loss_bar(d_bar_fake, valid_target_double) ####################### phraseZ_dics_loss.backward() barZ_dics_loss.backward() self.opt_Zdiscriminator_bar.step() self.opt_Zdiscriminator_phrase.step() avg_barZ_disc_loss.update(barZ_dics_loss) avg_phraseZ_disc_loss.update(phraseZ_dics_loss) #################### Generator #################### self.free(self.generator) self.frozen(self.z_discriminator_bar) self.frozen(self.z_discriminator_phrase) gen_note, z, pre_z, phrase_feature = self.generator( note, pre_note, pre_phrase, position) image_sample = gen_note origin_image = note #### Phrase Encoder Loss ### d_phrase_real = self.z_discriminator_phrase(phrase_feature).view( -1) loss = self.loss_phrase(d_phrase_real, valid_target) #### Bar Encoder Loss #### d_bar_real1 = self.z_discriminator_bar(z).view(-1) d_bar_real2 = self.z_discriminator_bar(pre_z).view(-1) loss += self.loss_bar(d_bar_real1, valid_target) + self.loss_bar( d_bar_real2, valid_target) #### Generator Los #### loss += self.loss_generator( gen_note, note, True if self.epoch <= self.pretraining_step_size else False) loss.backward() self.opt_generator.step() avg_generator_loss.update(loss) self.summary_writer.add_scalar("train/Generator_loss", avg_generator_loss.val, self.epoch) if self.epoch > self.pretraining_step_size and self.epoch % 2 is 0: self.summary_writer.add_scalar( "train/Bar_Z_Discriminator_loss", avg_barZ_disc_loss.val, self.epoch) self.summary_writer.add_scalar( "train/Phrase_Z_discriminator_loss", avg_phraseZ_disc_loss.val, self.epoch) tqdm_batch.close() self.summary_writer.add_image("train/sample 1", image_sample[0].reshape(1, 96, 60), self.epoch) self.summary_writer.add_image("train/sample 2", image_sample[1].reshape(1, 96, 60), self.epoch) self.summary_writer.add_image("train/sample 3", image_sample[2].reshape(1, 96, 60), self.epoch) image_sample = torch.gt(image_sample, 0.3).type('torch.cuda.FloatTensor') self.summary_writer.add_image("train/sample_binarization 1", image_sample[0].reshape(1, 96, 60), self.epoch) self.summary_writer.add_image("train/sample_binarization 2", image_sample[1].reshape(1, 96, 60), self.epoch) self.summary_writer.add_image("train/sample_binarization 3", image_sample[2].reshape(1, 96, 60), self.epoch) self.summary_writer.add_image("train/origin 1", origin_image[0].reshape(1, 96, 60), self.epoch) self.summary_writer.add_image("train/origin 2", origin_image[1].reshape(1, 96, 60), self.epoch) self.summary_writer.add_image("train/origin 3", origin_image[2].reshape(1, 96, 60), self.epoch) with torch.no_grad(): self.generator.eval() self.z_discriminator_bar.eval() self.z_discriminator_phrase.eval() outputs = [] pre_phrase = torch.zeros(1, 1, 384, 60, dtype=torch.float32) pre_bar = torch.zeros(1, 1, 96, 60, dtype=torch.float32) phrase_idx = [330] + [i for i in range(10 - 2, -1, -1)] for idx in range(10): bar_set = [] for _ in range(4): pre_bar = self.generator( torch.randn(1, 1152, dtype=torch.float32).cuda(), pre_bar.cuda(), pre_phrase, torch.from_numpy(np.array([phrase_idx[idx]])), False) pre_bar = torch.gt(pre_bar, 0.3).type( 'torch.FloatTensor') # 1, 1, 96, 96 bar_set.append(np.reshape(pre_bar.numpy(), [96, 60])) pre_phrase = np.concatenate(bar_set, axis=0) outputs.append(pre_phrase) pre_phrase = torch.from_numpy( np.reshape(pre_phrase, [1, 1, 96 * 4, 60])).float().cuda() self.summary_writer.add_image("eval/generated 1", outputs[0].reshape(1, 96 * 4, 60), self.epoch) self.summary_writer.add_image("eval/generated 2", outputs[1].reshape(1, 96 * 4, 60), self.epoch) self.scheduler_generator.step(avg_generator_loss.val) if self.epoch > self.pretraining_step_size and (self.epoch + curr_it) % 2 is 0: self.scheduler_Zdiscriminator_bar.step(avg_barZ_disc_loss.val) self.scheduler_Zdiscriminator_phrase.step( avg_phraseZ_disc_loss.val) self.logger.warning( 'loss info - generator: {}, barZ disc: {}, phraseZ disc: {}'. format(avg_generator_loss.val, avg_barZ_disc_loss.val, avg_phraseZ_disc_loss.val)) self.logger.warning( 'lr info - generator: {}, barZ disc: {}, phraseZ disc: {}'.format( self.get_lr(self.opt_generator), self.get_lr(self.opt_Zdiscriminator_bar), self.get_lr(self.opt_Zdiscriminator_phrase)))
def train(train_loader, model, criterion, optimizer, epoch, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for step, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(input) output = torch.mean(output, dim=2) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) loss = loss / config.TRAIN.GRAD_ACCUM_STEPS loss.backward() if step % config.TRAIN.GRAD_ACCUM_STEPS == 0: optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if step % config.TRAIN.PRINT_FREQ == 0: print(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, step, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr']))) if writer: writer.add_scalar('train/loss', losses.avg, epoch + 1) writer.add_scalar('train/top1', top1.avg, epoch + 1) writer.add_scalar('train/top5', top5.avg, epoch + 1)
def CAC(cfg, writer, logger): torch.manual_seed(cfg.get('seed', 1337)) torch.cuda.manual_seed(cfg.get('seed', 1337)) np.random.seed(cfg.get('seed', 1337)) random.seed(cfg.get('seed', 1337)) ## create dataset default_gpu = cfg['model']['default_gpu'] device = torch.device( "cuda:{}".format(default_gpu) if torch.cuda.is_available() else 'cpu') datasets = create_dataset( cfg, writer, logger ) #source_train\ target_train\ source_valid\ target_valid + _loader model = CustomModel(cfg, writer, logger) # Setup Metrics running_metrics_val = RunningScore(cfg['data']['target']['n_class']) source_running_metrics_val = RunningScore(cfg['data']['target']['n_class']) val_loss_meter = AverageMeter() source_val_loss_meter = AverageMeter() time_meter = AverageMeter() loss_fn = get_loss_function(cfg) flag_train = True epoches = cfg['training']['epoches'] source_train_loader = datasets.source_train_loader target_train_loader = datasets.target_train_loader logger.info('source train batchsize is {}'.format( source_train_loader.args.get('batch_size'))) print('source train batchsize is {}'.format( source_train_loader.args.get('batch_size'))) logger.info('target train batchsize is {}'.format( target_train_loader.batch_size)) print('target train batchsize is {}'.format( target_train_loader.batch_size)) val_loader = None if cfg.get('valset') == 'gta5': val_loader = datasets.source_valid_loader logger.info('valset is gta5') print('valset is gta5') else: val_loader = datasets.target_valid_loader logger.info('valset is cityscapes') print('valset is cityscapes') logger.info('val batchsize is {}'.format(val_loader.batch_size)) print('val batchsize is {}'.format(val_loader.batch_size)) # load category anchors # objective_vectors = torch.load('category_anchors') # model.objective_vectors = objective_vectors['objective_vectors'] # model.objective_vectors_num = objective_vectors['objective_num'] class_features = Class_Features(numbers=19) # begin training model.iter = 0 for epoch in range(epoches): if not flag_train: break if model.iter > cfg['training']['train_iters']: break # monitoring the accuracy and recall of CAG-based PLA and probability-based PLA for (target_image, target_label, target_img_name) in datasets.target_train_loader: model.iter += 1 i = model.iter if i > cfg['training']['train_iters']: break images, labels, source_img_name = datasets.source_train_loader.next( ) start_ts = time.time() images = images.to(device) labels = labels.to(device) target_image = target_image.to(device) target_label = target_label.to(device) model.scheduler_step() model.train(logger=logger) if cfg['training'].get('freeze_bn') == True: model.freeze_bn_apply() model.optimizer_zero_grad() if model.PredNet.training: model.PredNet.eval() with torch.no_grad(): _, _, feat_cls, output = model.PredNet_Forward(images) batch, w, h = labels.size() newlabels = labels.reshape([batch, 1, w, h]).float() newlabels = F.interpolate(newlabels, size=feat_cls.size()[2:], mode='nearest') vectors, ids = class_features.calculate_mean_vector( feat_cls, output, newlabels, model) for t in range(len(ids)): model.update_objective_SingleVector( ids[t], vectors[t].detach().cpu().numpy(), 'mean') time_meter.update(time.time() - start_ts) if model.iter % 20 == 0: print("Iter [{:d}] Time {:.4f}".format(model.iter, time_meter.avg)) if (i + 1) == cfg['training']['train_iters']: flag = False break save_path = os.path.join( writer.file_writer.get_logdir(), "anchors_on_{}_from_{}".format( cfg['data']['source']['name'], cfg['model']['arch'], )) torch.save(model.objective_vectors, save_path)