Example #1
0
def train(train_loader, model, criterion, optimizer, epoch):
    average_meter = AverageMeter()
    model.train()  # switch to train mode
    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        input, target = input.cuda(), target.cuda()
        torch.cuda.synchronize()
        data_time = time.time() - end

        # compute pred
        end = time.time()
        pred = model(input)
        loss = criterion(pred, target)
        optimizer.zero_grad()
        loss.backward()  # compute gradient and do SGD step
        optimizer.step()
        torch.cuda.synchronize()
        gpu_time = time.time() - end

        # measure accuracy and record loss
        result = Result()
        result.evaluate(pred.data, target.data)

        average_meter.update(result, gpu_time, data_time, input.size(0))
        end = time.time()

        if (i + 1) % args.print_freq == 0:
            print('=> output: {}'.format(output_directory))
            print('Train Epoch: {0} [{1}/{2}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'MAE={result.mae:.2f}({average.mae:.2f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'REL={result.absrel:.3f}({average.absrel:.3f}) '
                  'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
                      epoch,
                      i + 1,
                      len(train_loader),
                      data_time=data_time,
                      gpu_time=gpu_time,
                      result=result,
                      average=average_meter.average()))

    avg = average_meter.average()
    with open(train_csv, 'a') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writerow({
            'mse': avg.mse,
            'rmse': avg.rmse,
            'absrel': avg.absrel,
            'lg10': avg.lg10,
            'mae': avg.mae,
            'delta1': avg.delta1,
            'delta2': avg.delta2,
            'delta3': avg.delta3,
            'gpu_time': avg.gpu_time,
            'data_time': avg.data_time
        })
def artif_run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl, val_dl, val_dl_cluster, eval_fn, device):
  train_loss = AverageMeter()
  model.train()

  for i, batch in enumerate(train_dl):
    X = batch['X']
    y = batch['label']

    X = X.to(device)
    A = (y.unsqueeze(-2) == y.unsqueeze(-1)).float().to(device)

    logits = model(X)
    loss = criterion(logits, A)
    loss.backward()

    train_loss.update(loss.item())

    # optimize
    optimizer.step()
    optimizer.zero_grad()    

  # Eval
  metrics_log = {'val_loss': None, 'val_ari': None, 'val_nmi': None, 'val_acc': None, 
                     'val_tpr': None, 'val_tnr': None, 'val_num_failures': None}
  if args.eval_during_training:
    if ((t+1)%args.eval_freq==0) or (t==0):
      metrics_log = eval_fn(model, criterion, val_dl_cluster, args, device, True)
    else:
      metrics_log = eval_fn(model, criterion, val_dl, args, device, False)
    
  early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args, device, False)
  metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss']
  metrics_log['train_loss'] = train_loss.avg
  
  return metrics_log
def train_one_epoch(epoch, model, train_loader, optimizer, tokenizer, params):
    device = params.device
    avg_loss = AverageMeter()
    avg_acc = Accuracy(ignore_index=-1)

    model.train()
    for i, batch in enumerate(train_loader):
        optimizer.zero_grad()

        batch = batch.to(device)
        # segment = create_dummy_segment(batch)

        inputs, labels = mask_tokens(batch, tokenizer, params)
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs, masked_lm_labels=labels)
        loss, prediction_scores = outputs[:2]  # model outputs are always tuple in transformers (see doc)

        loss.backward()
        optimizer.step()

        avg_acc.update(prediction_scores.view(-1, params.vocab_size), labels.view(-1))
        avg_loss.update(loss.item())

    logging.info('Train-E-{}: loss: {:.4f}'.format(epoch, avg_loss()))
Example #4
0
File: train.py Project: syt2/CRA
def train_epoch(train_loader, model, loss_fn, optimizer, use_cuda, logger):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    model.train()
    end_time = time.time()
    for i, (input, label) in enumerate(tqdm(train_loader)):
        data_time.update(time.time() - end_time)
        if use_cuda:
            label = label.cuda()
            input = input.cuda()
        with torch.no_grad():
            input_var = torch.autograd.Variable(input)
            label_var = torch.autograd.Variable(label)
        output = model(input_var)
        loss = loss_fn(output, label_var)
        prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
        losses.update(loss.data, input.size(0))
        top1.update(prec1, input.size(0))
        top5.update(prec5, input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end_time)
        end_time = time.time()

    logger.info(
        '  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
            top1=top1, top5=top5))
    return top1.avg, losses.avg
Example #5
0
def train(train_loader, model, criterion, optimizer, epoch, logger):
    average_meter = AverageMeter()
    model.train()  # switch to train mode
    end = time.time()

    batch_num = len(train_loader)

    for i, (input, target) in enumerate(train_loader):

        # itr_count += 1
        input, target = input.cuda(), target.cuda()
        # print('input size  = ', input.size())
        # print('target size = ', target.size())
        torch.cuda.synchronize()
        data_time = time.time() - end

        # compute pred
        end = time.time()

        pred = model(input)  # @wx 注意输出

        # print('pred size = ', pred.size())
        # print('target size = ', target.size())

        loss = criterion(pred, target)
        optimizer.zero_grad()
        loss.backward()  # compute gradient and do SGD step
        optimizer.step()
        torch.cuda.synchronize()
        gpu_time = time.time() - end

        # measure accuracy and record loss
        result = Result()
        result.evaluate(pred.data, target.data)
        average_meter.update(result, gpu_time, data_time, input.size(0))
        end = time.time()

        if (i + 1) % args.print_freq == 0:
            print('=> output: {}'.format(output_directory))
            print('Train Epoch: {0} [{1}/{2}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'Loss={Loss:.5f} '
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'RML={result.absrel:.2f}({average.absrel:.2f}) '
                  'Log10={result.lg10:.3f}({average.lg10:.3f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'Delta2={result.delta2:.3f}({average.delta2:.3f}) '
                  'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format(
                epoch, i + 1, len(train_loader), data_time=data_time,
                gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average()))
            current_step = epoch * batch_num + i
            logger.add_scalar('Train/RMSE', result.rmse, current_step)
            logger.add_scalar('Train/rml', result.absrel, current_step)
            logger.add_scalar('Train/Log10', result.lg10, current_step)
            logger.add_scalar('Train/Delta1', result.delta1, current_step)
            logger.add_scalar('Train/Delta2', result.delta2, current_step)
            logger.add_scalar('Train/Delta3', result.delta3, current_step)

    avg = average_meter.average()
def validate(val_loader,
             model,
             criterion_tracking,
             criterion_completion,
             epoch,
             lambda_completion=0):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    loss_tracking = AverageMeter()
    loss_completion = AverageMeter()
    losses = AverageMeter()

    # switch to evaluate mode
    model.eval()
    with tqdm(enumerate(val_loader), total=len(val_loader), ncols=120) as t:

        with torch.no_grad():
            end = time.time()
            for i, (this_PC, prev_PC, model_PC, target) in t:
                # measure data loading time
                data_time.update(time.time() - end)

                this_PC = this_PC.cuda()
                prev_PC = prev_PC.cuda()
                model_PC = model_PC.cuda()
                target = target.cuda(non_blocking=True).view(-1)

                output, prev_PC_AE = model(this_PC, model_PC)

                if lambda_completion < 1:
                    loss1 = criterion_tracking(output, target)
                else:
                    loss1 = torch.tensor([0]).float().cuda()

                if lambda_completion != 0:
                    loss2 = criterion_completion(prev_PC_AE, model_PC)
                else:
                    loss2 = torch.tensor([0]).float().cuda()
                loss = loss1 + lambda_completion * loss2

                # measure accuracy and record loss
                loss_tracking.update(loss1.item(), this_PC.size(0))
                loss_completion.update(loss2.item(), this_PC.size(0))
                losses.update(loss.item(), this_PC.size(0))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                t.set_description(f'Valid {epoch}: '
                              f'Time {batch_time.avg:.3f}s '
                              f'(it:{batch_time.val:.3f}s) '
                              f'Data:{data_time.avg:.3f}s '
                              f'(it:{data_time.val:.3f}s) '
                              f'Loss {losses.avg:.4f} '
                              f'(tr:{loss_tracking.avg:.4f}, '
                              f'comp:{loss_completion.avg:.0f})')

    return losses.avg
Example #7
0
def validate(epoch, valData, model, logger):
    average_meter = AverageMeter()
    model.eval()  # switch to evaluate mode
    end = time.time()
    # skip = len(valData) // 9  # save images every skip iters

    for i, (image, depth) in enumerate(valData):
        image = image.cuda()
        depth = depth.cuda()
        # normal = normal.cuda()
        torch.cuda.synchronize()
        data_time = time.time() - end

        end = time.time()
        with torch.no_grad():
            pred = model(image)

        torch.cuda.synchronize()
        gpu_time = time.time() - end

        result = Result()
        result.evaluate(pred.data, depth.data)
        average_meter.update(result, gpu_time, data_time, image.size(0))
        end = time.time()

        if (i + 1) % 10 == 0:
            print('Test Epoch: [{0}/{1}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'RML={result.absrel:.2f}({average.absrel:.2f}) '
                  'Log10={result.lg10:.3f}({average.lg10:.3f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'Delta2={result.delta2:.3f}({average.delta2:.3f}) '
                  'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format(
                i + 1, len(valData), data_time=data_time,
                gpu_time=gpu_time, result=result, average=average_meter.average()))


        avg = average_meter.average()

        print('\n*\n'
              'RMSE={average.rmse:.3f}\n'
              'Rel={average.absrel:.3f}\n'
              'Log10={average.lg10:.3f}\n'
              'Delta1={average.delta1:.3f}\n'
              'Delta2={average.delta2:.3f}\n'
              'Delta3={average.delta3:.3f}\n'
              't_GPU={time:.3f}\n'.format(
            average=avg, time=avg.gpu_time))

        logger.add_scalar('Test/rmse', avg.rmse, epoch)
        logger.add_scalar('Test/Rel', avg.absrel, epoch)
        logger.add_scalar('Test/log10', avg.lg10, epoch)
        logger.add_scalar('Test/Delta1', avg.delta1, epoch)
        logger.add_scalar('Test/Delta2', avg.delta2, epoch)
        logger.add_scalar('Test/Delta3', avg.delta3, epoch)
        return avg
def train(train_loader, model, criterion, optimizer, epoch):
	average_meter = AverageMeter()
	model.train() # switch to train mode
	end = time.time()
	for i, (input, target) in enumerate(train_loader):
		input, target = input.cuda(), target.cuda()
		torch.cuda.synchronize()
		data_time = time.time() - end

		# compute pred
		end = time.time()
		pred = model(input)
		loss = criterion(pred, target)
		optimizer.zero_grad()
		loss.backward() # compute gradient and do SGD step
		optimizer.step()
		torch.cuda.synchronize()
		gpu_time = time.time() - end

		depth_in = np.hstack(input.data.cpu().numpy()[:4, 3] / 10.)
		depth_in = cv2.applyColorMap((depth_in * 255).astype(np.uint8), cv2.COLORMAP_HOT)

		tgt_out = np.hstack(np.squeeze(target[:4].data.cpu().numpy())) / 10.
		tgt_out = cv2.applyColorMap((tgt_out * 255).astype(np.uint8), cv2.COLORMAP_HOT)

		out = np.hstack(np.squeeze(pred[:4].data.cpu().numpy()))
		out = np.clip(out / 10., 0., 1.)
		out = cv2.applyColorMap((out * 255).astype(np.uint8), cv2.COLORMAP_HOT)

		if i % 20 == 0:
			cv2.imshow("Training Results", np.vstack([depth_in, tgt_out, out]))
			cv2.waitKey(1)

		# measure accuracy and record loss
		result = Result()
		result.evaluate(pred.data, target.data)
		average_meter.update(result, gpu_time, data_time, input.size(0))
		end = time.time()

		if (i + 1) % args.print_freq == 0:
			print('=> output: {}'.format(output_directory))
			print('Train Epoch: {0} [{1}/{2}]\t'
				  't_Data={data_time:.3f}({average.data_time:.3f}) '
				  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
				  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
				  'MAE={result.mae:.2f}({average.mae:.2f}) '
				  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
				  'REL={result.absrel:.3f}({average.absrel:.3f}) '
				  'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
				  epoch, i+1, len(train_loader), data_time=data_time,
				  gpu_time=gpu_time, result=result, average=average_meter.average()))

	avg = average_meter.average()
	with open(train_csv, 'a') as csvfile:
		writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
		writer.writerow({'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10,
			'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3,
			'gpu_time': avg.gpu_time, 'data_time': avg.data_time})
Example #9
0
def pdf_run_epoch(t, args, model, criterion, optimizer, train_dl, val_dl,
                  early_stop_dl, val_dl_cluster, eval_fn, device):

    # Train
    train_loss = AverageMeter()
    model.train()

    for i, batch in enumerate(train_dl):
        batch['X'] = batch['X'].unsqueeze(0)
        batch['label'] = batch['label'].unsqueeze(0)

        B, N = batch['X'].shape[0], batch['X'].shape[1]
        anchor_idxs = sample_anchors(B, N)
        anchor_labels = batch['label'][torch.arange(B),
                                       anchor_idxs].unsqueeze(1)
        target = (batch['label'] == anchor_labels).float().to(device)

        # Forward
        logits = model(batch['X'].to(device), anchor_idxs)
        loss = criterion(logits.squeeze(-1), target)

        # Backward
        loss.backward()
        train_loss.update(loss.item())

        # Optimize
        if (i + 1) % args.batch_size == 0:
            optimizer.step()
            optimizer.zero_grad()

    # Eval
    metrics_log = {
        'val_loss': None,
        'val_ari': None,
        'val_nmi': None,
        'val_acc': None,
        'val_tpr': None,
        'val_tnr': None,
        'val_num_failures': None
    }
    if args.eval_during_training:
        if ((t + 1) % args.eval_freq == 0) or (t == 0):
            metrics_log = eval_fn(model, criterion, val_dl_cluster, args,
                                  device, True)

        else:
            metrics_log = eval_fn(model, criterion, val_dl, args, device,
                                  False)

    # Compute early stop loss
    early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args,
                                     device, False)

    metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss']
    metrics_log['train_loss'] = train_loss.avg

    return metrics_log
Example #10
0
def train(train_loader, model, fc, criterion, optimizer, epoch):
    print('\n Training model...\n')
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    ratio = combine_ratio * (epoch / (args.epochs))

    # switch to train mode
    model.train()
    fc.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        target = target.to(device)
        input_var = Variable(input)
        target_var = Variable(target)

        optimizer.zero_grad()
        # compute output
        loss, output = criterion(model, fc, input_var, target_var, ratio)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                data_time=data_time, loss=losses, top1=top1, top5=top5))

    print(' * Acc@1 {top1.avg:.3f} Acc@1 Error {top1_err:.3f}\n'
              ' * Acc@5 {top5.avg:.3f} Acc@5 Error {top5_err:.3f}'
              .format(top1=top1, top1_err=100-top1.avg, top5=top5, top5_err=100-top5.avg))
Example #11
0
def pdf_run_epoch(t, args, model, criterion, optimizer, train_dl,
                  early_stop_dl, val_dl, val_dl_cluster, eval_fn, device):
    train_loss = AverageMeter()
    model.train()

    for i, batch in enumerate(train_dl):

        # Convert batch to be compatible with DAC model
        batch = batch_to_dac_compatible(batch, args.augment_pdf_data)

        # Forward
        loss = model.loss_fn_anchored(batch['X'].to(device),
                                      batch['label'].to(device))

        # Backward
        loss.backward()

        train_loss.update(loss.item())

        # optimize
        if (i + 1) % args.batch_size == 0:
            optimizer.step()
            optimizer.zero_grad()

    # Last batch might not be whole
    optimizer.step()
    optimizer.zero_grad()

    # Eval
    metrics_log = {
        'val_loss': None,
        'val_ari': None,
        'val_nmi': None,
        'val_acc': None,
        'val_tpr': None,
        'val_tnr': None,
        'val_num_failures': None
    }
    if args.eval_during_training:
        if ((t + 1) % args.eval_freq == 0) or (t == 0):
            metrics_log = eval_fn(model, criterion, val_dl_cluster, args,
                                  device, True)
        else:
            metrics_log = eval_fn(model, criterion, val_dl, args, device,
                                  False)

    # Compute early stop loss
    early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args,
                                     device, False)

    metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss']
    metrics_log['train_loss'] = train_loss.avg

    return metrics_log
Example #12
0
def run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl,
              val_dl, val_dl_cluster, eval_fn, device):

    train_loss = AverageMeter()
    model.train()

    for i, batch in enumerate(train_dl):

        #     print(f'train batch: {i}/{len(train_dl)}')
        # Convert batch to bags
        if args.pair_indicators:
            batch = batch_to_mil_pair_indicators(batch, args.n_pairs,
                                                 args.train_pairs_replacement)
        else:
            batch = batch_to_mil_pairs(batch, args.n_pairs, args.stratify,
                                       args.pair_bag_len)

        # Forward
        logits = model((batch['X'].to(device), batch['B'].to(device)))
        loss = criterion(logits, batch['T'].to(device))

        # Backward
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        train_loss.update(loss.item())

    # Eval
    metrics_log = {
        'val_loss': None,
        'val_ari': None,
        'val_nmi': None,
        'val_acc': None,
        'val_tpr': None,
        'val_tnr': None,
        'val_num_failures': None
    }
    if args.eval_during_training:
        if ((t + 1) % args.eval_freq == 0) or (t == 0):
            metrics_log = eval_fn(model, criterion, val_dl_cluster, args,
                                  device, True)

        else:
            metrics_log = eval_fn(model, criterion, val_dl, args, device,
                                  False)

    # Compute early stop loss
    early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args,
                                     device, False)

    metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss']
    metrics_log['train_loss'] = train_loss.avg

    return metrics_log
Example #13
0
def train(epoch, trainData, model, crite, optimizer, logger):
    average_meter = AverageMeter()
    model.train()  # switch to train mode
    end = time.time()

    for i, (image, depth) in enumerate(trainData):
        image = image.cuda()
        depth = depth.cuda()
        # normal = normal.cuda()
        # image = torch.autograd.Variable(image)
        # depth = torch.autograd.Variable(depth)
        torch.cuda.synchronize()
        data_time = time.time() - end

        end = time.time()
        optimizer.zero_grad()
        pred = model(image)
        loss = crite(pred, depth)
        loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        gpu_time = time.time() - end

        result = Result()
        result.evaluate(pred.data, depth.data)
        average_meter.update(result, gpu_time, data_time, image.size(0))
        end = time.time()

        if (i + 1) % 10 == 0:
            print('=> output: {}'.format(opt.output_dir))
            print('Train Epoch: {0} [{1}/{2}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'Loss={Loss:.5f} '
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'RML={result.absrel:.2f}({average.absrel:.2f}) '
                  'Log10={result.lg10:.3f}({average.lg10:.3f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'Delta2={result.delta2:.3f}({average.delta2:.3f}) '
                  'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format(
                epoch, i + 1, len(trainData), data_time=data_time,
                gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average()))
            current_step = epoch * len(trainData) + i
            logger.add_scalar('Train/loss', loss, current_step)
            logger.add_scalar('Train/RMSE', result.rmse, current_step)
            logger.add_scalar('Train/rml', result.absrel, current_step)
            logger.add_scalar('Train/Log10', result.lg10, current_step)
            logger.add_scalar('Train/Delta1', result.delta1, current_step)
            logger.add_scalar('Train/Delta2', result.delta2, current_step)
            logger.add_scalar('Train/Delta3', result.delta3, current_step)
Example #14
0
def pdf_eval_fn(model, criterion, val_dl, args, device, eval_clusters):

    with torch.no_grad():

        ari = AverageMeter()
        nmi = AverageMeter()
        val_loss = AverageMeter()
        num_failures = AverageMeter()

        model.eval()

        for batch in val_dl:
            # Convert batch to be compatible with DAC model
            batch = batch_to_dac_compatible(batch)

            loss = model.loss_fn_anchored(batch['X'].to(device),
                                          batch['label'].to(device))
            val_loss.update(loss.item())

            # Cluster
            if eval_clusters:
                params, labels, ll, fail = model.cluster_anchored(
                    batch['X'].to(device),
                    max_iter=args.max_iter,
                    verbose=False,
                    check=True)

                labels = labels[0].cpu().numpy()
                true_labels = batch['label'][0].argmax(-1).numpy()

                ari.update(adjusted_rand_score(true_labels, labels))
                nmi.update(
                    normalized_mutual_info_score(true_labels,
                                                 labels,
                                                 average_method='arithmetic'))
                num_failures.update(int(fail))

        if eval_clusters:
            metrics_log = {
                'val_loss': val_loss.avg,
                'val_ari': ari.avg,
                'val_nmi': nmi.avg,
                'val_acc': None,
                'val_tpr': None,
                'val_tnr': None,
                'val_num_failures': num_failures.avg
            }
        else:
            metrics_log = {
                'val_loss': val_loss.avg,
                'val_ari': None,
                'val_nmi': None,
                'val_acc': None,
                'val_tpr': None,
                'val_tnr': None,
                'val_num_failures': None
            }

    return metrics_log
 def compute_depth_metrics(self, verbose=True) -> Result:
     """Computes metrics on the difference between raw and fixed depth values"""
     avg = AverageMeter()
     for i, path in enumerate(self.paths):
         _, depth_raw, depth_fix = self.load_images(path)
         depth_raw = torch.tensor(depth_raw)
         depth_fix = torch.tensor(depth_fix)
         res = Result()
         res.evaluate(depth_raw, depth_fix)
         avg.update(res, 0, 0, 1)
         if verbose:
             stdout.write(f"=> computing img {i}/{len(self)}\r")
     if verbose:
         stdout.write("\n")
     return avg.average()
def validate(val_loader, model, criterion, epoch, writer=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for step, (input, target) in enumerate(val_loader):
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

            # compute output
            output = model(input)
            output = torch.mean(output, dim=2)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))

            losses.update(loss.item(), input.size(0))
            top1.update(prec1[0], input.size(0))
            top5.update(prec5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if step % config.TEST.PRINT_FREQ == 0:
                print(('Test: [{0}/{1}]\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                       'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                           step,
                           len(val_loader),
                           batch_time=batch_time,
                           loss=losses,
                           top1=top1,
                           top5=top5)))

        print((
            'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}'
            .format(top1=top1, top5=top5, loss=losses)))

        if writer:
            writer.add_scalar('val/loss', losses.avg, epoch + 1)
            writer.add_scalar('val/top1', top1.avg, epoch + 1)
            writer.add_scalar('val/top5', top5.avg, epoch + 1)

    return losses.avg
Example #17
0
def train_coarse(train_loader, model, criterion, optimizer, epoch):
    average_meter = AverageMeter()
    model.train()  # switch to train mode
    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        input, target = input.cuda(), target.cuda()
        torch.cuda.synchronize()
        data_time = time.time() - end

        # compute pred
        end = time.time()
        pred = model(input)
        loss = criterion(pred, target)
        optimizer.zero_grad()
        loss.backward()  # compute gradient and do SGD step
        optimizer.step()
        torch.cuda.synchronize()
        gpu_time = time.time() - end

        # measure accuracy and record loss
        end = time.time()
        result = Result()
        result.evaluate(pred.data, target.data)
        average_meter.update(result, gpu_time, data_time, input.size(0))
        eval_time = time.time() - end

        if (i + 1) % args.print_freq == 0:
            history_loss.append(loss.item())
            print('=> output: {}'.format(output_directory))
            print('Train Epoch: {0} [{1}/{2}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'MAE={result.mae:.2f}({average.mae:.2f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'REL={result.absrel:.3f}({average.absrel:.3f}) '
                  'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
                      epoch,
                      i + 1,
                      len(train_loader),
                      data_time=data_time,
                      gpu_time=gpu_time,
                      result=result,
                      average=average_meter.average()))
Example #18
0
def validate_epoch(val_loader, model, loss_fn, use_cuda):
    top1 = AverageMeter()
    top5 = AverageMeter()
    model.eval()
    for i, (input, label) in enumerate(tqdm(val_loader)):
        with torch.no_grad():
            if use_cuda:
                label = label.cuda()
                input = input.cuda()
            input_var = torch.autograd.Variable(input)
            label_var = torch.autograd.Variable(label)
            output = model(input_var)
            loss = loss_fn(output, label_var)
        prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
        top1.update(prec1, input.size(0))
        top5.update(prec5, input.size(0))
    print('  **Test** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
        top1=top1, top5=top5))
    return top1.avg
def test(test_loader, model, criterion, epoch):
    print('\n Running inference on test data...\n')
    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        end = time.time()
        for i, (input, target) in enumerate(test_loader):
            target = target.to(device)
            input = input.to(device)

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target.data, topk=(1, 5))
            losses.update(loss.data.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq // 4 == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(test_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        print(' * Acc@1 {top1.avg:.3f} Acc@1 Error {top1_err:.3f}\n'
              ' * Acc@5 {top5.avg:.3f} Acc@5 Error {top5_err:.3f}'.format(
                  top1=top1,
                  top1_err=100 - top1.avg,
                  top5=top5,
                  top5_err=100 - top5.avg))

        return top1, top5, losses
def evaluate(epoch, test_loader, tokenizer, params):
    device = params.device
    avg_loss = AverageMeter()

    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(test_loader):

            batch = batch.to(device)
            # segment = create_dummy_segment(batch)

            inputs, labels = mask_tokens(batch, tokenizer, params)
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs, masked_lm_labels=labels)
            loss = outputs[0]  # model outputs are always tuple in transformers (see doc)

            avg_loss.update(loss.item())

    logging.info('Test-E-{}: loss: {:.4f}'.format(epoch, avg_loss()))
    return avg_loss()
Example #21
0
 def test_cls(self, val_loader, opts, log_file):
     batch_time = AverageMeter()
     losses = AverageMeter()
     top1 = AverageMeter()
     self.model.eval()
     end = time.time()
     with torch.no_grad():
         for i, (inputs, target) in enumerate(val_loader):
             # inputs, target = inputs.cuda(), target.cuda()
             target = target.cuda()
             output = self.model(inputs)
             loss = self.criterion(output, target)
             prec = accuracy(output, target)[0]
             losses.update(loss.item(), target.size(0))
             top1.update(prec.item(), target.size(0))
             batch_time.update(time.time() - end)
             end = time.time()
             if i % opts.log_interval == 0:
                 print('Test: [{0}/{1}]\t'
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                       'Prec {top1.val:.4f}% ({top1.avg:.4f}%)'.format(
                           i,
                           len(val_loader),
                           batch_time=batch_time,
                           loss=losses,
                           top1=top1))
     print(' * Prec {top1.avg:.4f}% '.format(top1=top1))
     files = WriteData(log_file)
     files.write_data_txt('test: {top1.avg:.4f}% '.format(top1=top1))
     return top1.avg
Example #22
0
def evaluate(params, loader, model, experiment):
    print("Testing...")
    with experiment.test() and torch.no_grad():
        average = AverageMeter()
        end = time.time()
        for i, (inputs, targets) in enumerate(loader):
            inputs, targets = inputs.to(params["device"]), targets.to(
                params["device"])

            data_time = time.time() - end

            # Predict
            end = time.time()
            outputs = model(inputs)
            gpu_time = time.time() - end

            # Clip prediction
            outputs[outputs > params["depth_max"]] = params["depth_max"]
            outputs[outputs < params["depth_min"]] = params["depth_min"]

            result = Result()
            result.evaluate(outputs.data, targets.data)
            average.update(result, gpu_time, data_time, inputs.size(0))

            # Log images to comet
            img_merged = utils.log_image_to_comet(inputs[0],
                                                  targets[0],
                                                  outputs[0],
                                                  epoch=0,
                                                  id=i,
                                                  experiment=experiment,
                                                  result=result,
                                                  prefix="visual_test")
            if params["save_test_images"]:
                filename = os.path.join(
                    params["experiment_dir"],
                    "image_{}_epoch_{}.png".format(i,
                                                   str(params["start_epoch"])))
                utils.save_image(img_merged, filename)
Example #23
0
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (ip, target) in enumerate(val_loader):

            # noinspection DuplicatedCode
            ip = ip.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

            # compute output
            output = model(ip)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), ip.size(0))
            top1.update(prec1[0], ip.size(0))
            top5.update(prec5[0], ip.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % 100 == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
            top1=top1, top5=top5))

    return top1.avg
Example #24
0
 def train(self, trainloader, epoch, opts, log_file):
     batch_time = AverageMeter()
     data_time = AverageMeter()
     losses = AverageMeter()
     top1 = AverageMeter()
     self.model.train()
     end = time.time()
     for i, (inputs, target) in enumerate(trainloader):
         data_time.update(time.time() - end)
         target = target.cuda()
         output = self.model(inputs)
         loss = self.criterion(output, target)
         prec = accuracy(output, target)[0]
         losses.update(loss.item(), target.size(0))
         top1.update(prec.item(), target.size(0))
         self.optimizer.zero_grad()
         loss.backward()
         self.optimizer.step()
         batch_time.update(time.time() - end)
         end = time.time()
         if i % opts.log_interval == 0:
             print('Epoch: [{0}][{1}/{2}]\t'
                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                   'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                   'Prec {top1.val:.4f}% ({top1.avg:.4f}%)'.format(
                       epoch,
                       i,
                       len(trainloader),
                       batch_time=batch_time,
                       data_time=data_time,
                       loss=losses,
                       top1=top1))
     files = WriteData(log_file)
     files.write_data_txt('train: {top1.avg:.4f}%'.format(top1=top1) +
                          '    {loss.avg:.4f}'.format(loss=losses))
def validate(val_loader, model, epoch, write_to_file=True):
    average_meter = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        input, target = input.cuda(), target.cuda()
        # torch.cuda.synchronize()
        data_time = time.time() - end

        # compute output
        end = time.time()
        with torch.no_grad():
            pred = model(input)
        # torch.cuda.synchronize()
        gpu_time = time.time() - end

        # measure accuracy and record loss
        result = Result()
        result.evaluate(pred.data, target.data)
        average_meter.update(result, gpu_time, data_time, input.size(0))
        end = time.time()

        # save 8 images for visualization
        skip = 50
        if args.modality == 'd':
            img_merge = None
        else:
            if args.modality == 'rgb':
                rgb = input
            elif args.modality == 'rgbd':
                rgb = input[:, :3, :, :]
                depth = input[:, 3:, :, :]

            if i == 0:
                if args.modality == 'rgbd':
                    img_merge = utils.merge_into_row_with_gt(
                        rgb, depth, target, pred)
                else:
                    img_merge = utils.merge_into_row(rgb, target, pred)
            elif (i < 8 * skip) and (i % skip == 0):
                if args.modality == 'rgbd':
                    row = utils.merge_into_row_with_gt(rgb, depth, target,
                                                       pred)
                else:
                    row = utils.merge_into_row(rgb, target, pred)
                img_merge = utils.add_row(img_merge, row)
            elif i == 8 * skip:
                filename = output_directory + '/comparison_' + str(
                    epoch) + '.png'
                utils.save_image(img_merge, filename)

        if (i + 1) % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'MAE={result.mae:.2f}({average.mae:.2f})\n\t'
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'REL={result.absrel:.3f}({average.absrel:.3f}) '
                  'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
                      i + 1,
                      len(val_loader),
                      gpu_time=gpu_time,
                      result=result,
                      average=average_meter.average()))

    avg = average_meter.average()

    print('\n*\n'
          'RMSE={average.rmse:.3f}\n'
          'MAE={average.mae:.3f}\n'
          'Delta1={average.delta1:.3f}\n'
          'REL={average.absrel:.3f}\n'
          'Lg10={average.lg10:.3f}\n'
          't_GPU={time:.3f}\n'.format(average=avg, time=avg.gpu_time))

    if write_to_file:
        with open(test_csv, 'a') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerow({
                'mse': avg.mse,
                'rmse': avg.rmse,
                'absrel': avg.absrel,
                'lg10': avg.lg10,
                'mae': avg.mae,
                'delta1': avg.delta1,
                'delta2': avg.delta2,
                'delta3': avg.delta3,
                'data_time': avg.data_time,
                'gpu_time': avg.gpu_time
            })

    return avg, img_merge
Example #26
0
def validate(localizer,
             adversarial,
             dataloader,
             experiment_directory,
             labels,
             segmentation_map_threshold,
             num_classes,
             evaluate=False,
             save_results=False):
    """ Loop over the validation set (in batches) to acquire relevant metrics """
    print('Validating...')
    if evaluate:
        metrics = Metrics(20)
    localizer_criterion = torch.nn.BCELoss()
    adversarial_criterion = torch.nn.BCELoss()
    localizer_loss_meter = AverageMeter()
    adversarial_loss_meter = AverageMeter()
    for i, (inputs, targets) in enumerate(dataloader):
        if evaluate:
            # Segmentation maps are included in the targets
            targets, segmentation_maps = targets
        else:
            segmentation_maps = None

        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()

        output, gcams = localizer(inputs, labels=targets)

        loss = localizer_criterion(output, targets)
        localizer_loss_meter.update(loss.item())

        gcams, new_images, new_targets, original_targets = gcams

        if adversarial is not None or save_results:
            new_batch_size = gcams.size(0)
            masks = gcam_to_mask(gcams)

            masked_image = erase_mask(new_images, masks)
            if adversarial is not None:
                adversarial_output = adversarial(masked_image)
                adversarial_output = torch.sigmoid(adversarial_output)
                adversarial_loss = adversarial_criterion(
                    adversarial_output, original_targets)
                adversarial_loss_meter.update(adversarial_loss.item())

            if save_results:
                for k in range(new_batch_size):
                    number = f'{i * new_batch_size + k}'  #TODO: fix
                    label_string = labels[new_targets[k]]
                    file_postfix = f'{number}_{label_string}'
                    save_location = os.path.join(
                        experiment_directory, f'heatmap_{file_postfix}.png')
                    save_gradcam(filename=save_location,
                                 gcam=gcams[k, 0].detach(),
                                 raw_image=new_images[k].clone())
                    save_location = os.path.join(
                        experiment_directory,
                        f'raw_heatmap_{file_postfix}.png')
                    save_gradcam(filename=save_location,
                                 gcam=gcams[k, 0].detach())
                    save_location = os.path.join(experiment_directory,
                                                 f'erased_{file_postfix}.png')
                    tensor2imwrite(save_location, denormalize(masked_image[k]))

        if evaluate:
            # Generate and visualize predicted segmentation map
            predicted_segmentation_maps = generate_segmentation_map(
                gcams,
                num_classes,
                segmentation_maps.shape[1:],
                new_targets,
                threshold=segmentation_map_threshold)
            metrics.update(predicted_segmentation_maps, segmentation_maps)

            if save_results:
                predicted_indices = predicted_segmentation_maps.unique()
                all_labels = ['background', *labels]
                predicted_labels = [
                    all_labels[idx] for idx in predicted_indices
                ]
                labels_string = '_'.join(predicted_labels)
                filename = f'map_{i:04d}_{labels_string}.png'
                save_location = os.path.join(experiment_directory, filename)
                save_segmentation_map(save_location,
                                      predicted_segmentation_maps,
                                      denormalize(new_images[k]).clone())
                filename = f'map_raw_{i:04d}_{labels_string}.png'
                save_location = os.path.join(experiment_directory, filename)
                save_segmentation_map(save_location,
                                      predicted_segmentation_maps)

    print('Validation localizer loss:', localizer_loss_meter.avg)
    print('Validation adversarial loss:', adversarial_loss_meter.avg)

    if evaluate:
        miou = metrics.miou().item()
        precision = metrics.precision(skip_background=True).item()
        recall = metrics.recall(skip_background=True).item()
        metrics.print_scores_per_class()
        print('mIoU:', miou)
        print('precision:', precision)
        print('recall:', recall)
Example #27
0
File: train.py Project: syt2/CRA
def train(cfg, writer, logger):
    # This statement must be declared before using pytorch
    use_cuda = False
    if cfg.get("cuda", None) is not None:
        if cfg.get("cuda", None) != "all":
            os.environ["CUDA_VISIBLE_DEVICES"] = cfg.get("cuda", None)
        use_cuda = torch.cuda.is_available()

    # Setup random seed
    seed = cfg["training"].get("seed", random.randint(1, 10000))
    torch.manual_seed(seed)
    if use_cuda:
        torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # Setup Dataloader
    train_loader, val_loader = get_loader(cfg)

    # Setup Model
    model = get_model(cfg)
    # writer.add_graph(model, torch.rand([1, 3, 224, 224]))
    if use_cuda and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(
                                          range(torch.cuda.device_count())))

    # Setup optimizer, lr_scheduler and loss function
    optimizer = get_optimizer(model.parameters(), cfg)
    scheduler = get_scheduler(optimizer, cfg)
    loss_fn = get_loss_fn(cfg)

    # Setup Metrics
    epochs = cfg["training"]["epochs"]
    recorder = RecorderMeter(epochs)
    start_epoch = 0

    # save model parameters every <n> epochs
    save_interval = cfg["training"]["save_interval"]

    if use_cuda:
        model.cuda()
        loss_fn.cuda()

    # Resume Trained Model
    resume_path = os.path.join(writer.file_writer.get_logdir(),
                               cfg["training"]["resume"])
    best_path = os.path.join(writer.file_writer.get_logdir(),
                             cfg["training"]["best_model"])

    if cfg["training"]["resume"] is not None:
        if os.path.isfile(resume_path):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    resume_path))
            checkpoint = torch.load(resume_path)
            state = checkpoint["state_dict"]
            if torch.cuda.device_count() <= 1:
                state = convert_state_dict(state)
            model.load_state_dict(state)
            optimizer.load_state_dict(checkpoint["optimizer"])
            scheduler.load_state_dict(checkpoint["scheduler"])
            start_epoch = checkpoint["epoch"]
            recorder = checkpoint['recorder']
            logger.info("Loaded checkpoint '{}' (epoch {})".format(
                resume_path, checkpoint["epoch"]))
        else:
            logger.info("No checkpoint found at '{}'".format(resume_path))

    epoch_time = AverageMeter()
    for epoch in range(start_epoch, epochs):
        start_time = time.time()
        need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg *
                                                            (epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)
        logger.info(
            '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:8.6f}]'.
            format(time_string(), epoch, epochs, need_time, optimizer.
                   param_groups[0]['lr']) +  # scheduler.get_last_lr() >=1.4
            ' [Best : Accuracy={:.2f}]'.format(recorder.max_accuracy(False)))
        train_acc, train_los = train_epoch(train_loader, model, loss_fn,
                                           optimizer, use_cuda, logger)
        val_acc, val_los = validate_epoch(val_loader, model, loss_fn, use_cuda,
                                          logger)
        scheduler.step()

        is_best = recorder.update(epoch, train_los, train_acc, val_los,
                                  val_acc)
        if is_best or epoch % save_interval == 0 or epoch == epochs - 1:  # save model (resume model and best model)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'recorder': recorder,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                }, is_best, best_path, resume_path)

            for name, param in model.named_parameters():  # save histogram
                writer.add_histogram(name,
                                     param.clone().cpu().data.numpy(), epoch)

        writer.add_scalar('Train/loss', train_los, epoch)  # save curves
        writer.add_scalar('Train/acc', train_acc, epoch)
        writer.add_scalar('Val/loss', val_los, epoch)
        writer.add_scalar('Val/acc', val_acc, epoch)

        epoch_time.update(time.time() - start_time)

    writer.close()
    def train_epoch(self):
        tqdm_batch = tqdm(self.dataloader,
                          total=self.dataset.num_iterations,
                          desc="epoch-{}".format(self.epoch))

        image_sample = None
        Tensor = torch.cuda.FloatTensor

        avg_generator_loss = AverageMeter()
        avg_barZ_disc_loss = AverageMeter()
        avg_phraseZ_disc_loss = AverageMeter()
        for curr_it, (note, pre_note, pre_phrase,
                      position) in enumerate(tqdm_batch):
            note = note.cuda(async=self.config.async_loading)
            pre_note = pre_note.cuda(async=self.config.async_loading)
            pre_phrase = pre_phrase.cuda(async=self.config.async_loading)
            position = position.cuda(async=self.config.async_loading)

            note = Variable(note)
            pre_note = Variable(pre_note)
            pre_phrase = Variable(pre_phrase)
            position = Variable(position)

            valid_target = Variable(Tensor(note.size(0)).fill_(1.0),
                                    requires_grad=False)
            fake_target = Variable(Tensor(note.size(0)).fill_(0.0),
                                   requires_grad=False)
            valid_target_double = Variable(Tensor(note.size(0) * 2).fill_(1.0),
                                           requires_grad=False)

            self.iteration += 1

            ####################
            self.generator.train()
            self.z_discriminator_bar.train()
            self.z_discriminator_phrase.train()

            self.generator.zero_grad()
            self.z_discriminator_bar.zero_grad()
            self.z_discriminator_phrase.zero_grad()
            if self.epoch > self.pretraining_step_size and (self.epoch +
                                                            curr_it) % 2 is 0:
                #################### Discriminator ####################
                self.free(self.z_discriminator_bar)
                self.free(self.z_discriminator_phrase)

                self.frozen(self.generator)

                _, z, pre_z, phrase_feature = self.generator(
                    note, pre_note, pre_phrase, position)

                #### Phrase Feature ###
                phrase_fake = (torch.randn(phrase_feature.size(0),
                                           phrase_feature.size(1)) *
                               self.config.sigma).cuda()
                d_phrase_fake = self.z_discriminator_phrase(phrase_fake).view(
                    -1)
                d_phrase_real = self.z_discriminator_phrase(
                    phrase_feature).view(-1)
                phraseZ_dics_loss = self.loss_phrase(d_phrase_real, fake_target) +\
                                    self.loss_phrase(d_phrase_fake, valid_target)

                #### Bar Feature ####
                bar_fake = (torch.randn(z.size(0) * 2, z.size(1)) *
                            self.config.sigma).cuda()
                d_bar_fake = self.z_discriminator_bar(bar_fake).view(-1)
                d_bar_real1 = self.z_discriminator_bar(z).view(-1)
                d_bar_real2 = self.z_discriminator_bar(pre_z).view(-1)
                barZ_dics_loss = self.loss_bar(d_bar_real1, fake_target) + self.loss_bar(d_bar_real2, fake_target) + \
                                 self.loss_bar(d_bar_fake, valid_target_double)

                #######################
                phraseZ_dics_loss.backward()
                barZ_dics_loss.backward()

                self.opt_Zdiscriminator_bar.step()
                self.opt_Zdiscriminator_phrase.step()

                avg_barZ_disc_loss.update(barZ_dics_loss)
                avg_phraseZ_disc_loss.update(phraseZ_dics_loss)

            #################### Generator ####################
            self.free(self.generator)

            self.frozen(self.z_discriminator_bar)
            self.frozen(self.z_discriminator_phrase)

            gen_note, z, pre_z, phrase_feature = self.generator(
                note, pre_note, pre_phrase, position)

            image_sample = gen_note
            origin_image = note

            #### Phrase Encoder Loss ###
            d_phrase_real = self.z_discriminator_phrase(phrase_feature).view(
                -1)
            loss = self.loss_phrase(d_phrase_real, valid_target)

            #### Bar Encoder Loss ####
            d_bar_real1 = self.z_discriminator_bar(z).view(-1)
            d_bar_real2 = self.z_discriminator_bar(pre_z).view(-1)
            loss += self.loss_bar(d_bar_real1, valid_target) + self.loss_bar(
                d_bar_real2, valid_target)

            #### Generator Los ####
            loss += self.loss_generator(
                gen_note, note,
                True if self.epoch <= self.pretraining_step_size else False)

            loss.backward()

            self.opt_generator.step()

            avg_generator_loss.update(loss)

            self.summary_writer.add_scalar("train/Generator_loss",
                                           avg_generator_loss.val, self.epoch)
            if self.epoch > self.pretraining_step_size and self.epoch % 2 is 0:
                self.summary_writer.add_scalar(
                    "train/Bar_Z_Discriminator_loss", avg_barZ_disc_loss.val,
                    self.epoch)
                self.summary_writer.add_scalar(
                    "train/Phrase_Z_discriminator_loss",
                    avg_phraseZ_disc_loss.val, self.epoch)

        tqdm_batch.close()

        self.summary_writer.add_image("train/sample 1",
                                      image_sample[0].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/sample 2",
                                      image_sample[1].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/sample 3",
                                      image_sample[2].reshape(1, 96,
                                                              60), self.epoch)

        image_sample = torch.gt(image_sample,
                                0.3).type('torch.cuda.FloatTensor')

        self.summary_writer.add_image("train/sample_binarization 1",
                                      image_sample[0].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/sample_binarization 2",
                                      image_sample[1].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/sample_binarization 3",
                                      image_sample[2].reshape(1, 96,
                                                              60), self.epoch)

        self.summary_writer.add_image("train/origin 1",
                                      origin_image[0].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/origin 2",
                                      origin_image[1].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/origin 3",
                                      origin_image[2].reshape(1, 96,
                                                              60), self.epoch)

        with torch.no_grad():
            self.generator.eval()
            self.z_discriminator_bar.eval()
            self.z_discriminator_phrase.eval()

            outputs = []
            pre_phrase = torch.zeros(1, 1, 384, 60, dtype=torch.float32)
            pre_bar = torch.zeros(1, 1, 96, 60, dtype=torch.float32)
            phrase_idx = [330] + [i for i in range(10 - 2, -1, -1)]
            for idx in range(10):
                bar_set = []
                for _ in range(4):
                    pre_bar = self.generator(
                        torch.randn(1, 1152, dtype=torch.float32).cuda(),
                        pre_bar.cuda(), pre_phrase,
                        torch.from_numpy(np.array([phrase_idx[idx]])), False)
                    pre_bar = torch.gt(pre_bar, 0.3).type(
                        'torch.FloatTensor')  # 1, 1, 96, 96
                    bar_set.append(np.reshape(pre_bar.numpy(), [96, 60]))

                pre_phrase = np.concatenate(bar_set, axis=0)
                outputs.append(pre_phrase)
                pre_phrase = torch.from_numpy(
                    np.reshape(pre_phrase, [1, 1, 96 * 4, 60])).float().cuda()

        self.summary_writer.add_image("eval/generated 1",
                                      outputs[0].reshape(1, 96 * 4,
                                                         60), self.epoch)
        self.summary_writer.add_image("eval/generated 2",
                                      outputs[1].reshape(1, 96 * 4,
                                                         60), self.epoch)

        self.scheduler_generator.step(avg_generator_loss.val)
        if self.epoch > self.pretraining_step_size and (self.epoch +
                                                        curr_it) % 2 is 0:
            self.scheduler_Zdiscriminator_bar.step(avg_barZ_disc_loss.val)
            self.scheduler_Zdiscriminator_phrase.step(
                avg_phraseZ_disc_loss.val)

        self.logger.warning(
            'loss info - generator: {}, barZ disc: {},  phraseZ disc: {}'.
            format(avg_generator_loss.val, avg_barZ_disc_loss.val,
                   avg_phraseZ_disc_loss.val))
        self.logger.warning(
            'lr info - generator: {}, barZ disc: {},  phraseZ disc: {}'.format(
                self.get_lr(self.opt_generator),
                self.get_lr(self.opt_Zdiscriminator_bar),
                self.get_lr(self.opt_Zdiscriminator_phrase)))
def train(train_loader, model, criterion, optimizer, epoch, writer=None):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for step, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # compute output
        output = model(input)
        output = torch.mean(output, dim=2)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        loss = loss / config.TRAIN.GRAD_ACCUM_STEPS

        loss.backward()

        if step % config.TRAIN.GRAD_ACCUM_STEPS == 0:
            optimizer.step()
            optimizer.zero_grad()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % config.TRAIN.PRINT_FREQ == 0:
            print(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t'
                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                   'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                   'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                   'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                       epoch,
                       step,
                       len(train_loader),
                       batch_time=batch_time,
                       data_time=data_time,
                       loss=losses,
                       top1=top1,
                       top5=top5,
                       lr=optimizer.param_groups[-1]['lr'])))

        if writer:
            writer.add_scalar('train/loss', losses.avg, epoch + 1)
            writer.add_scalar('train/top1', top1.avg, epoch + 1)
            writer.add_scalar('train/top5', top5.avg, epoch + 1)
Example #30
0
def CAC(cfg, writer, logger):
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))
    ## create dataset
    default_gpu = cfg['model']['default_gpu']
    device = torch.device(
        "cuda:{}".format(default_gpu) if torch.cuda.is_available() else 'cpu')
    datasets = create_dataset(
        cfg, writer, logger
    )  #source_train\ target_train\ source_valid\ target_valid + _loader

    model = CustomModel(cfg, writer, logger)

    # Setup Metrics
    running_metrics_val = RunningScore(cfg['data']['target']['n_class'])
    source_running_metrics_val = RunningScore(cfg['data']['target']['n_class'])
    val_loss_meter = AverageMeter()
    source_val_loss_meter = AverageMeter()
    time_meter = AverageMeter()
    loss_fn = get_loss_function(cfg)
    flag_train = True

    epoches = cfg['training']['epoches']

    source_train_loader = datasets.source_train_loader
    target_train_loader = datasets.target_train_loader
    logger.info('source train batchsize is {}'.format(
        source_train_loader.args.get('batch_size')))
    print('source train batchsize is {}'.format(
        source_train_loader.args.get('batch_size')))
    logger.info('target train batchsize is {}'.format(
        target_train_loader.batch_size))
    print('target train batchsize is {}'.format(
        target_train_loader.batch_size))

    val_loader = None
    if cfg.get('valset') == 'gta5':
        val_loader = datasets.source_valid_loader
        logger.info('valset is gta5')
        print('valset is gta5')
    else:
        val_loader = datasets.target_valid_loader
        logger.info('valset is cityscapes')
        print('valset is cityscapes')
    logger.info('val batchsize is {}'.format(val_loader.batch_size))
    print('val batchsize is {}'.format(val_loader.batch_size))

    # load category anchors
    # objective_vectors = torch.load('category_anchors')
    # model.objective_vectors = objective_vectors['objective_vectors']
    # model.objective_vectors_num = objective_vectors['objective_num']
    class_features = Class_Features(numbers=19)

    # begin training
    model.iter = 0
    for epoch in range(epoches):
        if not flag_train:
            break
        if model.iter > cfg['training']['train_iters']:
            break

        # monitoring the accuracy and recall of CAG-based PLA and probability-based PLA

        for (target_image, target_label,
             target_img_name) in datasets.target_train_loader:
            model.iter += 1
            i = model.iter
            if i > cfg['training']['train_iters']:
                break
            images, labels, source_img_name = datasets.source_train_loader.next(
            )
            start_ts = time.time()

            images = images.to(device)
            labels = labels.to(device)
            target_image = target_image.to(device)
            target_label = target_label.to(device)
            model.scheduler_step()
            model.train(logger=logger)
            if cfg['training'].get('freeze_bn') == True:
                model.freeze_bn_apply()
            model.optimizer_zero_grad()
            if model.PredNet.training:
                model.PredNet.eval()
            with torch.no_grad():
                _, _, feat_cls, output = model.PredNet_Forward(images)
                batch, w, h = labels.size()
                newlabels = labels.reshape([batch, 1, w, h]).float()
                newlabels = F.interpolate(newlabels,
                                          size=feat_cls.size()[2:],
                                          mode='nearest')
                vectors, ids = class_features.calculate_mean_vector(
                    feat_cls, output, newlabels, model)
                for t in range(len(ids)):
                    model.update_objective_SingleVector(
                        ids[t], vectors[t].detach().cpu().numpy(), 'mean')

            time_meter.update(time.time() - start_ts)
            if model.iter % 20 == 0:
                print("Iter [{:d}] Time {:.4f}".format(model.iter,
                                                       time_meter.avg))

            if (i + 1) == cfg['training']['train_iters']:
                flag = False
                break
    save_path = os.path.join(
        writer.file_writer.get_logdir(), "anchors_on_{}_from_{}".format(
            cfg['data']['source']['name'],
            cfg['model']['arch'],
        ))
    torch.save(model.objective_vectors, save_path)