def train_one_epoch(epoch, model, train_loader, optimizer, tokenizer, params):
    device = params.device
    avg_loss = AverageMeter()
    avg_acc = Accuracy(ignore_index=-1)

    model.train()
    for i, batch in enumerate(train_loader):
        optimizer.zero_grad()

        batch = batch.to(device)
        # segment = create_dummy_segment(batch)

        inputs, labels = mask_tokens(batch, tokenizer, params)
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs, masked_lm_labels=labels)
        loss, prediction_scores = outputs[:2]  # model outputs are always tuple in transformers (see doc)

        loss.backward()
        optimizer.step()

        avg_acc.update(prediction_scores.view(-1, params.vocab_size), labels.view(-1))
        avg_loss.update(loss.item())

    logging.info('Train-E-{}: loss: {:.4f}'.format(epoch, avg_loss()))
Example #2
0
def test(cfg, writer, logger):
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))
    ## create dataset
    default_gpu = cfg['model']['default_gpu']
    device = torch.device(
        "cuda:{}".format(default_gpu) if torch.cuda.is_available() else 'cpu')
    datasets = create_dataset(
        cfg, writer, logger
    )  #source_train\ target_train\ source_valid\ target_valid + _loader

    model = CustomModel(cfg, writer, logger)
    running_metrics_val = RunningScore(cfg['data']['target']['n_class'])
    source_running_metrics_val = RunningScore(cfg['data']['target']['n_class'])
    val_loss_meter = AverageMeter()
    source_val_loss_meter = AverageMeter()
    time_meter = AverageMeter()
    loss_fn = get_loss_function(cfg)
    path = cfg['test']['path']
    checkpoint = torch.load(path)
    model.adaptive_load_nets(model.BaseNet,
                             checkpoint['DeepLab']['model_state'])

    validation(
                model, logger, writer, datasets, device, running_metrics_val, val_loss_meter, loss_fn,\
                source_val_loss_meter, source_running_metrics_val, iters = model.iter
                )
def artif_run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl, val_dl, val_dl_cluster, eval_fn, device):
  train_loss = AverageMeter()
  model.train()

  for i, batch in enumerate(train_dl):
    X = batch['X']
    y = batch['label']

    X = X.to(device)
    A = (y.unsqueeze(-2) == y.unsqueeze(-1)).float().to(device)

    logits = model(X)
    loss = criterion(logits, A)
    loss.backward()

    train_loss.update(loss.item())

    # optimize
    optimizer.step()
    optimizer.zero_grad()    

  # Eval
  metrics_log = {'val_loss': None, 'val_ari': None, 'val_nmi': None, 'val_acc': None, 
                     'val_tpr': None, 'val_tnr': None, 'val_num_failures': None}
  if args.eval_during_training:
    if ((t+1)%args.eval_freq==0) or (t==0):
      metrics_log = eval_fn(model, criterion, val_dl_cluster, args, device, True)
    else:
      metrics_log = eval_fn(model, criterion, val_dl, args, device, False)
    
  early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args, device, False)
  metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss']
  metrics_log['train_loss'] = train_loss.avg
  
  return metrics_log
Example #4
0
def iterate(mode, args, loader, model, optimizer, logger, epoch):
    block_average_meter = AverageMeter()
    average_meter = AverageMeter()
    meters = [block_average_meter, average_meter]

    # switch to appropriate mode
    assert mode in ["train", "val", "eval", "test_prediction", "test_completion"], \
        "unsupported mode: {}".format(mode)
    if mode == 'train':
        model.train()
        lr = helper.adjust_learning_rate(args.lr, optimizer, epoch)
    else:
        model.eval()
        lr = 0

    for i, batch_data in enumerate(loader):
        start = time.time()
        batch_data = {
            key: val.to(device)
            for key, val in batch_data.items() if val is not None
        }
        gt = batch_data[
            'gt'] if mode != 'test_prediction' and mode != 'test_completion' else None
        data_time = time.time() - start

        start = time.time()
        with torch.no_grad():  # 自己加的
            pred = model(batch_data)
        depth_loss, photometric_loss, smooth_loss, mask = 0, 0, 0, None

        gpu_time = time.time() - start

        # measure accuracy and record loss
        with torch.no_grad():
            mini_batch_size = next(iter(batch_data.values())).size(0)
            result = Result()
            if mode != 'test_prediction' and mode != 'test_completion':
                #result.evaluate(pred.data, gt.data, photometric_loss)
                result.evaluate(pred.data.cpu(), gt.data.cpu(),
                                photometric_loss)
            [
                m.update(result, gpu_time, data_time, mini_batch_size)
                for m in meters
            ]
            logger.conditional_print(mode, i, epoch, lr, len(loader),
                                     block_average_meter, average_meter)
            logger.conditional_save_img_comparison(mode, i, batch_data, pred,
                                                   epoch)
            logger.conditional_save_pred(mode, i, pred, epoch)

    avg = logger.conditional_save_info(mode, average_meter, epoch)
    is_best = logger.rank_conditional_save_best(mode, avg, epoch)
    if is_best and not (mode == "train"):
        logger.save_img_comparison_as_best(mode, epoch)
    logger.conditional_summarize(mode, avg, is_best)

    return avg, is_best
Example #5
0
def pdf_run_epoch(t, args, model, criterion, optimizer, train_dl, val_dl,
                  early_stop_dl, val_dl_cluster, eval_fn, device):

    # Train
    train_loss = AverageMeter()
    model.train()

    for i, batch in enumerate(train_dl):
        batch['X'] = batch['X'].unsqueeze(0)
        batch['label'] = batch['label'].unsqueeze(0)

        B, N = batch['X'].shape[0], batch['X'].shape[1]
        anchor_idxs = sample_anchors(B, N)
        anchor_labels = batch['label'][torch.arange(B),
                                       anchor_idxs].unsqueeze(1)
        target = (batch['label'] == anchor_labels).float().to(device)

        # Forward
        logits = model(batch['X'].to(device), anchor_idxs)
        loss = criterion(logits.squeeze(-1), target)

        # Backward
        loss.backward()
        train_loss.update(loss.item())

        # Optimize
        if (i + 1) % args.batch_size == 0:
            optimizer.step()
            optimizer.zero_grad()

    # Eval
    metrics_log = {
        'val_loss': None,
        'val_ari': None,
        'val_nmi': None,
        'val_acc': None,
        'val_tpr': None,
        'val_tnr': None,
        'val_num_failures': None
    }
    if args.eval_during_training:
        if ((t + 1) % args.eval_freq == 0) or (t == 0):
            metrics_log = eval_fn(model, criterion, val_dl_cluster, args,
                                  device, True)

        else:
            metrics_log = eval_fn(model, criterion, val_dl, args, device,
                                  False)

    # Compute early stop loss
    early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args,
                                     device, False)

    metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss']
    metrics_log['train_loss'] = train_loss.avg

    return metrics_log
Example #6
0
def pdf_run_epoch(t, args, model, criterion, optimizer, train_dl,
                  early_stop_dl, val_dl, val_dl_cluster, eval_fn, device):
    train_loss = AverageMeter()
    model.train()

    for i, batch in enumerate(train_dl):

        # Convert batch to be compatible with DAC model
        batch = batch_to_dac_compatible(batch, args.augment_pdf_data)

        # Forward
        loss = model.loss_fn_anchored(batch['X'].to(device),
                                      batch['label'].to(device))

        # Backward
        loss.backward()

        train_loss.update(loss.item())

        # optimize
        if (i + 1) % args.batch_size == 0:
            optimizer.step()
            optimizer.zero_grad()

    # Last batch might not be whole
    optimizer.step()
    optimizer.zero_grad()

    # Eval
    metrics_log = {
        'val_loss': None,
        'val_ari': None,
        'val_nmi': None,
        'val_acc': None,
        'val_tpr': None,
        'val_tnr': None,
        'val_num_failures': None
    }
    if args.eval_during_training:
        if ((t + 1) % args.eval_freq == 0) or (t == 0):
            metrics_log = eval_fn(model, criterion, val_dl_cluster, args,
                                  device, True)
        else:
            metrics_log = eval_fn(model, criterion, val_dl, args, device,
                                  False)

    # Compute early stop loss
    early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args,
                                     device, False)

    metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss']
    metrics_log['train_loss'] = train_loss.avg

    return metrics_log
Example #7
0
def run_epoch(t, args, model, criterion, optimizer, train_dl, early_stop_dl,
              val_dl, val_dl_cluster, eval_fn, device):

    train_loss = AverageMeter()
    model.train()

    for i, batch in enumerate(train_dl):

        #     print(f'train batch: {i}/{len(train_dl)}')
        # Convert batch to bags
        if args.pair_indicators:
            batch = batch_to_mil_pair_indicators(batch, args.n_pairs,
                                                 args.train_pairs_replacement)
        else:
            batch = batch_to_mil_pairs(batch, args.n_pairs, args.stratify,
                                       args.pair_bag_len)

        # Forward
        logits = model((batch['X'].to(device), batch['B'].to(device)))
        loss = criterion(logits, batch['T'].to(device))

        # Backward
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        train_loss.update(loss.item())

    # Eval
    metrics_log = {
        'val_loss': None,
        'val_ari': None,
        'val_nmi': None,
        'val_acc': None,
        'val_tpr': None,
        'val_tnr': None,
        'val_num_failures': None
    }
    if args.eval_during_training:
        if ((t + 1) % args.eval_freq == 0) or (t == 0):
            metrics_log = eval_fn(model, criterion, val_dl_cluster, args,
                                  device, True)

        else:
            metrics_log = eval_fn(model, criterion, val_dl, args, device,
                                  False)

    # Compute early stop loss
    early_stop_metrics_log = eval_fn(model, criterion, early_stop_dl, args,
                                     device, False)

    metrics_log['early_stop_loss'] = early_stop_metrics_log['val_loss']
    metrics_log['train_loss'] = train_loss.avg

    return metrics_log
Example #8
0
def train(epoch, trainData, model, crite, optimizer, logger):
    average_meter = AverageMeter()
    model.train()  # switch to train mode
    end = time.time()

    for i, (image, depth) in enumerate(trainData):
        image = image.cuda()
        depth = depth.cuda()
        # normal = normal.cuda()
        # image = torch.autograd.Variable(image)
        # depth = torch.autograd.Variable(depth)
        torch.cuda.synchronize()
        data_time = time.time() - end

        end = time.time()
        optimizer.zero_grad()
        pred = model(image)
        loss = crite(pred, depth)
        loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        gpu_time = time.time() - end

        result = Result()
        result.evaluate(pred.data, depth.data)
        average_meter.update(result, gpu_time, data_time, image.size(0))
        end = time.time()

        if (i + 1) % 10 == 0:
            print('=> output: {}'.format(opt.output_dir))
            print('Train Epoch: {0} [{1}/{2}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'Loss={Loss:.5f} '
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'RML={result.absrel:.2f}({average.absrel:.2f}) '
                  'Log10={result.lg10:.3f}({average.lg10:.3f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'Delta2={result.delta2:.3f}({average.delta2:.3f}) '
                  'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format(
                epoch, i + 1, len(trainData), data_time=data_time,
                gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average()))
            current_step = epoch * len(trainData) + i
            logger.add_scalar('Train/loss', loss, current_step)
            logger.add_scalar('Train/RMSE', result.rmse, current_step)
            logger.add_scalar('Train/rml', result.absrel, current_step)
            logger.add_scalar('Train/Log10', result.lg10, current_step)
            logger.add_scalar('Train/Delta1', result.delta1, current_step)
            logger.add_scalar('Train/Delta2', result.delta2, current_step)
            logger.add_scalar('Train/Delta3', result.delta3, current_step)
 def compute_depth_metrics(self, verbose=True) -> Result:
     """Computes metrics on the difference between raw and fixed depth values"""
     avg = AverageMeter()
     for i, path in enumerate(self.paths):
         _, depth_raw, depth_fix = self.load_images(path)
         depth_raw = torch.tensor(depth_raw)
         depth_fix = torch.tensor(depth_fix)
         res = Result()
         res.evaluate(depth_raw, depth_fix)
         avg.update(res, 0, 0, 1)
         if verbose:
             stdout.write(f"=> computing img {i}/{len(self)}\r")
     if verbose:
         stdout.write("\n")
     return avg.average()
Example #10
0
def validate(val_loader, model, epoch, write_to_file=True):
    average_meter = AverageMeter()
    model.eval()
    end = time.time()
    i = 0
    cam = cv2.VideoCapture("http://192.168.178.195:4747/mjpegfeed?640x480")
    while True:
        input = cv2.resize(cam.read()[1], (640, 480))
        cv2.imshow("IN", input)
        print(input)
        input = torch.Tensor(np.reshape(input, [3, 480, 640])).unsqueeze(0).float()
        # torch.cuda.synchronize()
        data_time = time.time() - end

        # compute output
        end = time.time()
        with torch.no_grad():
            pred = model(input)
        # torch.cuda.synchronize()
        gpu_time = time.time() - end

        pred = np.array(pred.squeeze(0))*10.
        print(pred)

        cv2.imshow("PRED", cv2.resize(np.reshape(pred, [480, 640, 1]), (640, 480)))
        cv2.waitKey(1)
Example #11
0
def forward_stats(batch, context, training=True, init_meters=False):
    # @start interface
    dataset_ptr = batch['dataset_ptr']

    stats = context['stats_train' if training else 'stats_val']

    meter_names = [
        '{}_loss_meter'.format(dataset_ptr),
        '{}_top1_meter'.format(dataset_ptr),
        '{}_top5_meter'.format(dataset_ptr),
        # '{}_top100_meter'.format(dataset_ptr)
    ]

    meter_funcs = [
        lambda batch: float(batch['loss'].numpy()),
        lambda batch: float(accuracy(batch['ys'], batch['labels'], [1])[0][0]),
        lambda batch: float(accuracy(batch['ys'], batch['labels'], [5])[0][0]),
        # lambda batch: accuracy(batch['ys'], batch['labels'], [100])[0],
    ]

    # @end interface

    for meter_name, meter_func in zip(meter_names, meter_funcs):
        if meter_name not in stats or (not training and init_meters):
            stats[meter_name] = AverageMeter()
        stats[meter_name].update(meter_func(batch), batch['labels'].size(0))
Example #12
0
def prepare_stats(context):
    context['step'] = 0
    context['best_metric'] = None

    context['stats_train'] = dict(
        batch_time=AverageMeter(),
        data_time=AverageMeter(),
    )

    context['stats_val'] = dict(
        batch_time=AverageMeter(),
        data_time=AverageMeter(),
    )
    context['timer'] = Timer()

    return context['step'], context['best_metric'], context[
        'stats_train'], context['stats_val'], context['timer']
Example #13
0
 def on_start(state):
     state['loss_meter'] = AverageMeter()
     state['test_interval'] = int(
         len(train_dataset) / args.batch_size * args.test_interval)
     state['t'] = 1
     model.train()
     if args.verbose:
         state['progress_bar'] = tqdm(total=state['test_interval'])
Example #14
0
def train_coarse(train_loader, model, criterion, optimizer, epoch):
    average_meter = AverageMeter()
    model.train()  # switch to train mode
    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        input, target = input.cuda(), target.cuda()
        torch.cuda.synchronize()
        data_time = time.time() - end

        # compute pred
        end = time.time()
        pred = model(input)
        loss = criterion(pred, target)
        optimizer.zero_grad()
        loss.backward()  # compute gradient and do SGD step
        optimizer.step()
        torch.cuda.synchronize()
        gpu_time = time.time() - end

        # measure accuracy and record loss
        end = time.time()
        result = Result()
        result.evaluate(pred.data, target.data)
        average_meter.update(result, gpu_time, data_time, input.size(0))
        eval_time = time.time() - end

        if (i + 1) % args.print_freq == 0:
            history_loss.append(loss.item())
            print('=> output: {}'.format(output_directory))
            print('Train Epoch: {0} [{1}/{2}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'MAE={result.mae:.2f}({average.mae:.2f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'REL={result.absrel:.3f}({average.absrel:.3f}) '
                  'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
                      epoch,
                      i + 1,
                      len(train_loader),
                      data_time=data_time,
                      gpu_time=gpu_time,
                      result=result,
                      average=average_meter.average()))
def evaluate(epoch, test_loader, tokenizer, params):
    device = params.device
    avg_loss = AverageMeter()

    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(test_loader):

            batch = batch.to(device)
            # segment = create_dummy_segment(batch)

            inputs, labels = mask_tokens(batch, tokenizer, params)
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs, masked_lm_labels=labels)
            loss = outputs[0]  # model outputs are always tuple in transformers (see doc)

            avg_loss.update(loss.item())

    logging.info('Test-E-{}: loss: {:.4f}'.format(epoch, avg_loss()))
    return avg_loss()
Example #16
0
def train(train_loader, model, criterion, optimizer, epoch, logger):
    average_meter = AverageMeter()
    model.train()  # switch to train mode
    end = time.time()

    batch_num = len(train_loader)

    for i, (input, target) in enumerate(train_loader):

        # itr_count += 1
        input, target = input.cuda(), target.cuda()
        # print('input size  = ', input.size())
        # print('target size = ', target.size())
        torch.cuda.synchronize()
        data_time = time.time() - end

        # compute pred
        end = time.time()

        pred = model(input)  # @wx 注意输出

        # print('pred size = ', pred.size())
        # print('target size = ', target.size())

        loss = criterion(pred, target)
        optimizer.zero_grad()
        loss.backward()  # compute gradient and do SGD step
        optimizer.step()
        torch.cuda.synchronize()
        gpu_time = time.time() - end

        # measure accuracy and record loss
        result = Result()
        result.evaluate(pred.data, target.data)
        average_meter.update(result, gpu_time, data_time, input.size(0))
        end = time.time()

        if (i + 1) % args.print_freq == 0:
            print('=> output: {}'.format(output_directory))
            print('Train Epoch: {0} [{1}/{2}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'Loss={Loss:.5f} '
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'RML={result.absrel:.2f}({average.absrel:.2f}) '
                  'Log10={result.lg10:.3f}({average.lg10:.3f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'Delta2={result.delta2:.3f}({average.delta2:.3f}) '
                  'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format(
                epoch, i + 1, len(train_loader), data_time=data_time,
                gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average()))
            current_step = epoch * batch_num + i
            logger.add_scalar('Train/RMSE', result.rmse, current_step)
            logger.add_scalar('Train/rml', result.absrel, current_step)
            logger.add_scalar('Train/Log10', result.lg10, current_step)
            logger.add_scalar('Train/Delta1', result.delta1, current_step)
            logger.add_scalar('Train/Delta2', result.delta2, current_step)
            logger.add_scalar('Train/Delta3', result.delta3, current_step)

    avg = average_meter.average()
Example #17
0
def train(train_loader, model, criterion, optimizer, epoch):
    average_meter = AverageMeter()
    model.train()  # switch to train mode
    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        input, target = input.cuda(), target.cuda()
        torch.cuda.synchronize()
        data_time = time.time() - end

        # compute pred
        end = time.time()
        pred = model(input)
        loss = criterion(pred, target)
        optimizer.zero_grad()
        loss.backward()  # compute gradient and do SGD step
        optimizer.step()
        torch.cuda.synchronize()
        gpu_time = time.time() - end

        # measure accuracy and record loss
        result = Result()
        result.evaluate(pred.data, target.data)

        average_meter.update(result, gpu_time, data_time, input.size(0))
        end = time.time()

        if (i + 1) % args.print_freq == 0:
            print('=> output: {}'.format(output_directory))
            print('Train Epoch: {0} [{1}/{2}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'MAE={result.mae:.2f}({average.mae:.2f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'REL={result.absrel:.3f}({average.absrel:.3f}) '
                  'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
                      epoch,
                      i + 1,
                      len(train_loader),
                      data_time=data_time,
                      gpu_time=gpu_time,
                      result=result,
                      average=average_meter.average()))

    avg = average_meter.average()
    with open(train_csv, 'a') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writerow({
            'mse': avg.mse,
            'rmse': avg.rmse,
            'absrel': avg.absrel,
            'lg10': avg.lg10,
            'mae': avg.mae,
            'delta1': avg.delta1,
            'delta2': avg.delta2,
            'delta3': avg.delta3,
            'gpu_time': avg.gpu_time,
            'data_time': avg.data_time
        })
Example #18
0
def evaluate(params, loader, model, experiment):
    print("Testing...")
    with experiment.test() and torch.no_grad():
        average = AverageMeter()
        end = time.time()
        for i, (inputs, targets) in enumerate(loader):
            inputs, targets = inputs.to(params["device"]), targets.to(
                params["device"])

            data_time = time.time() - end

            # Predict
            end = time.time()
            outputs = model(inputs)
            gpu_time = time.time() - end

            # Clip prediction
            outputs[outputs > params["depth_max"]] = params["depth_max"]
            outputs[outputs < params["depth_min"]] = params["depth_min"]

            result = Result()
            result.evaluate(outputs.data, targets.data)
            average.update(result, gpu_time, data_time, inputs.size(0))

            # Log images to comet
            img_merged = utils.log_image_to_comet(inputs[0],
                                                  targets[0],
                                                  outputs[0],
                                                  epoch=0,
                                                  id=i,
                                                  experiment=experiment,
                                                  result=result,
                                                  prefix="visual_test")
            if params["save_test_images"]:
                filename = os.path.join(
                    params["experiment_dir"],
                    "image_{}_epoch_{}.png".format(i,
                                                   str(params["start_epoch"])))
                utils.save_image(img_merged, filename)
Example #19
0
def validate(epoch, valData, model, logger):
    average_meter = AverageMeter()
    model.eval()  # switch to evaluate mode
    end = time.time()
    # skip = len(valData) // 9  # save images every skip iters

    for i, (image, depth) in enumerate(valData):
        image = image.cuda()
        depth = depth.cuda()
        # normal = normal.cuda()
        torch.cuda.synchronize()
        data_time = time.time() - end

        end = time.time()
        with torch.no_grad():
            pred = model(image)

        torch.cuda.synchronize()
        gpu_time = time.time() - end

        result = Result()
        result.evaluate(pred.data, depth.data)
        average_meter.update(result, gpu_time, data_time, image.size(0))
        end = time.time()

        if (i + 1) % 10 == 0:
            print('Test Epoch: [{0}/{1}]\t'
                  't_Data={data_time:.3f}({average.data_time:.3f}) '
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'RML={result.absrel:.2f}({average.absrel:.2f}) '
                  'Log10={result.lg10:.3f}({average.lg10:.3f}) '
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'Delta2={result.delta2:.3f}({average.delta2:.3f}) '
                  'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format(
                i + 1, len(valData), data_time=data_time,
                gpu_time=gpu_time, result=result, average=average_meter.average()))


        avg = average_meter.average()

        print('\n*\n'
              'RMSE={average.rmse:.3f}\n'
              'Rel={average.absrel:.3f}\n'
              'Log10={average.lg10:.3f}\n'
              'Delta1={average.delta1:.3f}\n'
              'Delta2={average.delta2:.3f}\n'
              'Delta3={average.delta3:.3f}\n'
              't_GPU={time:.3f}\n'.format(
            average=avg, time=avg.gpu_time))

        logger.add_scalar('Test/rmse', avg.rmse, epoch)
        logger.add_scalar('Test/Rel', avg.absrel, epoch)
        logger.add_scalar('Test/log10', avg.lg10, epoch)
        logger.add_scalar('Test/Delta1', avg.delta1, epoch)
        logger.add_scalar('Test/Delta2', avg.delta2, epoch)
        logger.add_scalar('Test/Delta3', avg.delta3, epoch)
        return avg
Example #20
0
    def test_retr(self, query_loader, gal_loader, opts, log_file):  #####
        batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        self.model.eval()
        end = time.time()

        count = 0
        query_feats = torch.FloatTensor().cuda(
        )  # train_set, train_targets, query_set, query_targets, gallery_set, gallery_targets
        gal_feats = torch.FloatTensor().cuda()
        query_targets, gallery_targets = [], []
        eps = 1e-8

        with torch.no_grad():
            for i, (inputs, target) in enumerate(query_loader):
                output = self.model(inputs)
                query_feats = torch.cat((query_feats, output), 0)
                query_targets.append(target.item())

            for i, (inputs, target) in enumerate(gal_loader):
                output = self.model(inputs)
                gal_feats = torch.cat((gal_feats, output), 0)
                gallery_targets.append(target.item())

            fnorm = torch.norm(query_feats, p=2, dim=1, keepdim=True)
            query_feats = query_feats.div(fnorm.expand_as(query_feats) + eps)

            fnorm = torch.norm(gal_feats, p=2, dim=1, keepdim=True)
            gal_feats = gal_feats.div(fnorm.expand_as(gal_feats) + eps)

        cmc, mAP = evaluate(query_feats, query_targets, gal_feats,
                            gallery_targets)
        files = WriteData(log_file)
        files.write_data_txt('test: {cmc:.4}%, {mAP:.4}%'.format(cmc=cmc,
                                                                 mAP=mAP))
        print('cmc rank 1,5,10, mAP:', cmc[0], cmc[4], cmc[9], mAP)
        return cmc, mAP
def train(train_loader, model, criterion, optimizer, epoch):
	average_meter = AverageMeter()
	model.train() # switch to train mode
	end = time.time()
	for i, (input, target) in enumerate(train_loader):
		input, target = input.cuda(), target.cuda()
		torch.cuda.synchronize()
		data_time = time.time() - end

		# compute pred
		end = time.time()
		pred = model(input)
		loss = criterion(pred, target)
		optimizer.zero_grad()
		loss.backward() # compute gradient and do SGD step
		optimizer.step()
		torch.cuda.synchronize()
		gpu_time = time.time() - end

		depth_in = np.hstack(input.data.cpu().numpy()[:4, 3] / 10.)
		depth_in = cv2.applyColorMap((depth_in * 255).astype(np.uint8), cv2.COLORMAP_HOT)

		tgt_out = np.hstack(np.squeeze(target[:4].data.cpu().numpy())) / 10.
		tgt_out = cv2.applyColorMap((tgt_out * 255).astype(np.uint8), cv2.COLORMAP_HOT)

		out = np.hstack(np.squeeze(pred[:4].data.cpu().numpy()))
		out = np.clip(out / 10., 0., 1.)
		out = cv2.applyColorMap((out * 255).astype(np.uint8), cv2.COLORMAP_HOT)

		if i % 20 == 0:
			cv2.imshow("Training Results", np.vstack([depth_in, tgt_out, out]))
			cv2.waitKey(1)

		# measure accuracy and record loss
		result = Result()
		result.evaluate(pred.data, target.data)
		average_meter.update(result, gpu_time, data_time, input.size(0))
		end = time.time()

		if (i + 1) % args.print_freq == 0:
			print('=> output: {}'.format(output_directory))
			print('Train Epoch: {0} [{1}/{2}]\t'
				  't_Data={data_time:.3f}({average.data_time:.3f}) '
				  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
				  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
				  'MAE={result.mae:.2f}({average.mae:.2f}) '
				  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
				  'REL={result.absrel:.3f}({average.absrel:.3f}) '
				  'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
				  epoch, i+1, len(train_loader), data_time=data_time,
				  gpu_time=gpu_time, result=result, average=average_meter.average()))

	avg = average_meter.average()
	with open(train_csv, 'a') as csvfile:
		writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
		writer.writerow({'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10,
			'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3,
			'gpu_time': avg.gpu_time, 'data_time': avg.data_time})
Example #22
0
def validate_epoch(val_loader, model, loss_fn, use_cuda):
    top1 = AverageMeter()
    top5 = AverageMeter()
    model.eval()
    for i, (input, label) in enumerate(tqdm(val_loader)):
        with torch.no_grad():
            if use_cuda:
                label = label.cuda()
                input = input.cuda()
            input_var = torch.autograd.Variable(input)
            label_var = torch.autograd.Variable(label)
            output = model(input_var)
            loss = loss_fn(output, label_var)
        prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
        top1.update(prec1, input.size(0))
        top5.update(prec5, input.size(0))
    print('  **Test** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
        top1=top1, top5=top5))
    return top1.avg
def validate(val_loader, model, epoch, write_to_file=True):
    average_meter = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        input, target = input.cuda(), target.cuda()
        # torch.cuda.synchronize()
        data_time = time.time() - end

        # compute output
        end = time.time()
        with torch.no_grad():
            pred = model(input)
        # torch.cuda.synchronize()
        gpu_time = time.time() - end

        # measure accuracy and record loss
        result = Result()
        result.evaluate(pred.data, target.data)
        average_meter.update(result, gpu_time, data_time, input.size(0))
        end = time.time()

        # save 8 images for visualization
        skip = 50
        if args.modality == 'd':
            img_merge = None
        else:
            if args.modality == 'rgb':
                rgb = input
            elif args.modality == 'rgbd':
                rgb = input[:, :3, :, :]
                depth = input[:, 3:, :, :]

            if i == 0:
                if args.modality == 'rgbd':
                    img_merge = utils.merge_into_row_with_gt(
                        rgb, depth, target, pred)
                else:
                    img_merge = utils.merge_into_row(rgb, target, pred)
            elif (i < 8 * skip) and (i % skip == 0):
                if args.modality == 'rgbd':
                    row = utils.merge_into_row_with_gt(rgb, depth, target,
                                                       pred)
                else:
                    row = utils.merge_into_row(rgb, target, pred)
                img_merge = utils.add_row(img_merge, row)
            elif i == 8 * skip:
                filename = output_directory + '/comparison_' + str(
                    epoch) + '.png'
                utils.save_image(img_merge, filename)

        if (i + 1) % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
                  'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                  'MAE={result.mae:.2f}({average.mae:.2f})\n\t'
                  'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                  'REL={result.absrel:.3f}({average.absrel:.3f}) '
                  'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
                      i + 1,
                      len(val_loader),
                      gpu_time=gpu_time,
                      result=result,
                      average=average_meter.average()))

    avg = average_meter.average()

    print('\n*\n'
          'RMSE={average.rmse:.3f}\n'
          'MAE={average.mae:.3f}\n'
          'Delta1={average.delta1:.3f}\n'
          'REL={average.absrel:.3f}\n'
          'Lg10={average.lg10:.3f}\n'
          't_GPU={time:.3f}\n'.format(average=avg, time=avg.gpu_time))

    if write_to_file:
        with open(test_csv, 'a') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerow({
                'mse': avg.mse,
                'rmse': avg.rmse,
                'absrel': avg.absrel,
                'lg10': avg.lg10,
                'mae': avg.mae,
                'delta1': avg.delta1,
                'delta2': avg.delta2,
                'delta3': avg.delta3,
                'data_time': avg.data_time,
                'gpu_time': avg.gpu_time
            })

    return avg, img_merge
    def train_epoch(self):
        if self.epoch > self.pretraining_step_size:
            self.train_count += 1
        tqdm_batch = tqdm(self.dataloader, total=self.dataset.num_iterations,
                          desc="epoch-{}".format(self.epoch))

        image_sample = None
        origin_image = None
        Tensor = torch.cuda.FloatTensor

        avg_generator_loss = AverageMeter()
        avg_discriminator_loss = AverageMeter()
        avg_feature_discriminator_loss = AverageMeter()
        avg_barZ_disc_loss = AverageMeter()
        avg_phraseZ_disc_loss = AverageMeter()

        for curr_it, (note, pre_note, pre_phrase, position) in enumerate(tqdm_batch):
            self.iteration += 1
            note = note.cuda(async=self.config.async_loading)
            pre_note = pre_note.cuda(async=self.config.async_loading)
            pre_phrase = pre_phrase.cuda(async=self.config.async_loading)
            position = position.cuda(async=self.config.async_loading)

            note = Variable(note)
            pre_note = Variable(pre_note)
            pre_phrase = Variable(pre_phrase)
            position = Variable(position)

            origin_image = note
            valid_target = Variable(Tensor(note.size(0)).fill_(1.0), requires_grad=False)
            fake_target = Variable(Tensor(note.size(0)).fill_(0.0), requires_grad=False)

            if self.epoch <= self.pretraining_step_size:
                image_sample = self.train_pretrain(note, pre_note, pre_phrase, position, avg_generator_loss)
            else:
                if self.flag_gan:
                    image_sample = self.train_gan(note, pre_note, pre_phrase, position,
                                                  avg_generator_loss, avg_discriminator_loss,
                                                  avg_feature_discriminator_loss,
                                                  fake_target, valid_target, curr_it)

                else:
                    image_sample = self.train_wae(note, pre_note, pre_phrase, position,
                                                  avg_generator_loss, avg_barZ_disc_loss, avg_phraseZ_disc_loss,
                                                  fake_target, valid_target, curr_it)

        tqdm_batch.close()

        if self.flag_gan and self.train_count >= 100:
            self.flag_gan = not self.flag_gan
            self.train_count = 0
        elif not self.flag_gan and self.train_count >= 50:
            self.flag_gan = not self.flag_gan
            self.train_count = 0

        with torch.no_grad():
            self.generator.eval()
            self.discriminator.eval()
            self.discriminator_feature.eval()
            self.z_discriminator_bar.eval()
            self.z_discriminator_phrase.eval()

            outputs = []
            pre_phrase = torch.zeros(1, 1, 384, 60, dtype=torch.float32)
            pre_bar = torch.zeros(1, 1, 96, 60, dtype=torch.float32)
            phrase_idx = [330] + [i for i in range(10 - 2, -1, -1)]
            for idx in range(10):
                bar_set = []
                for _ in range(4):
                    pre_bar, _ = self.generator(torch.randn(1, 1152, dtype=torch.float32).cuda(), pre_bar.cuda(),
                                             pre_phrase, torch.from_numpy(np.array([phrase_idx[idx]])), False)
                    pre_bar = torch.gt(pre_bar, 0.3).type('torch.FloatTensor')  # 1, 1, 96, 96
                    bar_set.append(np.reshape(pre_bar.numpy(), [96, 60]))

                pre_phrase = np.concatenate(bar_set, axis=0)
                outputs.append(pre_phrase)
                pre_phrase = torch.from_numpy(np.reshape(pre_phrase, [1, 1, 96 * 4, 60])).float().cuda()

        self.record_image(image_sample[:3], origin_image[:3], outputs)

        self.scheduler_generator.step(avg_generator_loss.val)
        if self.epoch > self.pretraining_step_size:
            self.scheduler_discriminator.step(avg_discriminator_loss.val)
            self.scheduler_discriminator_feature.step(avg_feature_discriminator_loss.val)
            self.scheduler_Zdiscriminator_bar.step(avg_barZ_disc_loss.val)
            self.scheduler_Zdiscriminator_phrase.step(avg_phraseZ_disc_loss.val)

        self.logger.warning(
            'loss info - gen: {}, barZ disc: {},  phraseZ disc: {}, bar disc: {}, bar_seq disc: {}'.format(
                avg_generator_loss.val, avg_barZ_disc_loss.val, avg_phraseZ_disc_loss.val,
                avg_feature_discriminator_loss.val, avg_discriminator_loss.val)
        )
        self.logger.warning(
            'lr info - gen: {}, barZ disc: {},  phraseZ disc: {}, bar disc: {}, bar_seq disc: {}'.format(
                self.get_lr(self.opt_generator), self.get_lr(self.opt_Zdiscriminator_bar),
                self.get_lr(self.opt_Zdiscriminator_phrase), self.get_lr(self.opt_discriminator_feature),
                self.get_lr(self.opt_discriminator))
        )
Example #25
0
def iterate(mode, args, loader, model, optimizer, logger, epoch):
    block_average_meter = AverageMeter()
    average_meter = AverageMeter()
    meters = [block_average_meter, average_meter]

    # switch to appropriate mode
    assert mode in ["train", "val", "eval", "test_prediction", "test_completion"], \
        "unsupported mode: {}".format(mode)
    if mode == 'train':
        model.train()
        lr = helper.adjust_learning_rate(args.lr, optimizer, epoch)
    else:
        model.eval(
        )  # batchnorm or dropout layers will work in eval mode instead of training mode
        lr = 0

    for i, batch_data in enumerate(
            loader
    ):  # batch_data keys: 'd' (depth), 'gt' (ground truth), 'g' (gray)
        start = time.time()
        batch_data = {
            key: val.to(device)
            for key, val in batch_data.items() if val is not None
        }

        gt = batch_data[
            'gt'] if mode != 'test_prediction' and mode != 'test_completion' else None
        data_time = time.time() - start

        start = time.time()

        pred = model(batch_data)
        if args.save_images:  # save depth predictions
            pred_out_dir = max(glob.glob('../outputs/var_final_NN/var.test*'),
                               key=os.path.getmtime) + '/dense_depth_images'
            pred1 = pred.cpu().detach().numpy()[:, 0, :, :]
            for im_idx, pred_im in enumerate(pred1):
                pred_out_dir1 = os.path.abspath(pred_out_dir)
                cur_path = os.path.abspath((loader.dataset.paths)['d'][i])
                basename = os.path.basename(cur_path)
                cur_dir = os.path.abspath(os.path.dirname(cur_path))
                cur_dir = cur_dir.split('var_final_NN/')[1]
                new_dir = os.path.abspath(pred_out_dir1 + '/' + cur_dir)
                new_path = os.path.abspath(new_dir + '/' + basename)
                if os.path.isdir(new_dir) == False:
                    os.makedirs(new_dir)

                depth_write(new_path, pred_im)

        depth_loss, photometric_loss, smooth_loss, mask = 0, 0, 0, None
        if mode == 'train':
            # Loss 1: the direct depth supervision from ground truth label
            # mask=1 indicates that a pixel does not ground truth labels
            if 'sparse' in args.train_mode:
                depth_loss = depth_criterion(pred, batch_data['d'])
                mask = (batch_data['d'] < 1e-3).float()
            elif 'dense' in args.train_mode:
                depth_loss = depth_criterion(pred, gt)
                mask = (gt < 1e-3).float()

            # Loss 2: the self-supervised photometric loss
            if args.use_pose:
                # create multi-scale pyramids
                pred_array = helper.multiscale(pred)
                rgb_curr_array = helper.multiscale(batch_data['rgb'])
                rgb_near_array = helper.multiscale(batch_data['rgb_near'])
                if mask is not None:
                    mask_array = helper.multiscale(mask)
                num_scales = len(pred_array)

                # compute photometric loss at multiple scales
                for scale in range(len(pred_array)):
                    pred_ = pred_array[scale]
                    rgb_curr_ = rgb_curr_array[scale]
                    rgb_near_ = rgb_near_array[scale]
                    mask_ = None
                    if mask is not None:
                        mask_ = mask_array[scale]

                    # compute the corresponding intrinsic parameters
                    height_, width_ = pred_.size(2), pred_.size(3)
                    intrinsics_ = kitti_intrinsics.scale(height_, width_)

                    # inverse warp from a nearby frame to the current frame
                    warped_ = homography_from(rgb_near_, pred_,
                                              batch_data['r_mat'],
                                              batch_data['t_vec'], intrinsics_)
                    photometric_loss += photometric_criterion(
                        rgb_curr_, warped_, mask_) * (2**(scale - num_scales))

            # Loss 3: the depth smoothness loss
            smooth_loss = smoothness_criterion(pred) if args.w2 > 0 else 0

            # backprop
            loss = depth_loss + args.w1 * photometric_loss + args.w2 * smooth_loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        gpu_time = time.time() - start

        # measure accuracy and record loss of each batch
        with torch.no_grad(
        ):  # impacts the autograd engine and deactivate it (will reduce memory usage and speed up computations)
            mini_batch_size = next(iter(batch_data.values())).size(0)
            result = Result()  # metrics
            if mode != 'test_prediction' and mode != 'test_completion':
                result.evaluate(pred.data, gt.data, photometric_loss)
            [
                m.update(result, gpu_time, data_time, mini_batch_size)
                for m in meters
            ]
            logger.conditional_print(mode, i, epoch, args.epochs, lr,
                                     len(loader), block_average_meter,
                                     average_meter)
            logger.conditional_save_img_comparison(mode, i, batch_data, pred,
                                                   epoch)
            logger.conditional_save_pred(mode, i, pred, epoch)
        del pred

    avg = logger.conditional_save_info(
        mode, average_meter,
        epoch)  # take the avg of all the batches, to get the epoch metrics
    is_best = logger.rank_conditional_save_best(mode, avg, epoch, args.epochs)
    if is_best and not (mode == "train"):
        logger.save_img_comparison_as_best(mode, epoch)
    logger.conditional_summarize(mode, avg, is_best)

    return avg, is_best
Example #26
0
def validate(localizer,
             adversarial,
             dataloader,
             experiment_directory,
             labels,
             segmentation_map_threshold,
             num_classes,
             evaluate=False,
             save_results=False):
    """ Loop over the validation set (in batches) to acquire relevant metrics """
    print('Validating...')
    if evaluate:
        metrics = Metrics(20)
    localizer_criterion = torch.nn.BCELoss()
    adversarial_criterion = torch.nn.BCELoss()
    localizer_loss_meter = AverageMeter()
    adversarial_loss_meter = AverageMeter()
    for i, (inputs, targets) in enumerate(dataloader):
        if evaluate:
            # Segmentation maps are included in the targets
            targets, segmentation_maps = targets
        else:
            segmentation_maps = None

        if torch.cuda.is_available():
            inputs, targets = inputs.cuda(), targets.cuda()

        output, gcams = localizer(inputs, labels=targets)

        loss = localizer_criterion(output, targets)
        localizer_loss_meter.update(loss.item())

        gcams, new_images, new_targets, original_targets = gcams

        if adversarial is not None or save_results:
            new_batch_size = gcams.size(0)
            masks = gcam_to_mask(gcams)

            masked_image = erase_mask(new_images, masks)
            if adversarial is not None:
                adversarial_output = adversarial(masked_image)
                adversarial_output = torch.sigmoid(adversarial_output)
                adversarial_loss = adversarial_criterion(
                    adversarial_output, original_targets)
                adversarial_loss_meter.update(adversarial_loss.item())

            if save_results:
                for k in range(new_batch_size):
                    number = f'{i * new_batch_size + k}'  #TODO: fix
                    label_string = labels[new_targets[k]]
                    file_postfix = f'{number}_{label_string}'
                    save_location = os.path.join(
                        experiment_directory, f'heatmap_{file_postfix}.png')
                    save_gradcam(filename=save_location,
                                 gcam=gcams[k, 0].detach(),
                                 raw_image=new_images[k].clone())
                    save_location = os.path.join(
                        experiment_directory,
                        f'raw_heatmap_{file_postfix}.png')
                    save_gradcam(filename=save_location,
                                 gcam=gcams[k, 0].detach())
                    save_location = os.path.join(experiment_directory,
                                                 f'erased_{file_postfix}.png')
                    tensor2imwrite(save_location, denormalize(masked_image[k]))

        if evaluate:
            # Generate and visualize predicted segmentation map
            predicted_segmentation_maps = generate_segmentation_map(
                gcams,
                num_classes,
                segmentation_maps.shape[1:],
                new_targets,
                threshold=segmentation_map_threshold)
            metrics.update(predicted_segmentation_maps, segmentation_maps)

            if save_results:
                predicted_indices = predicted_segmentation_maps.unique()
                all_labels = ['background', *labels]
                predicted_labels = [
                    all_labels[idx] for idx in predicted_indices
                ]
                labels_string = '_'.join(predicted_labels)
                filename = f'map_{i:04d}_{labels_string}.png'
                save_location = os.path.join(experiment_directory, filename)
                save_segmentation_map(save_location,
                                      predicted_segmentation_maps,
                                      denormalize(new_images[k]).clone())
                filename = f'map_raw_{i:04d}_{labels_string}.png'
                save_location = os.path.join(experiment_directory, filename)
                save_segmentation_map(save_location,
                                      predicted_segmentation_maps)

    print('Validation localizer loss:', localizer_loss_meter.avg)
    print('Validation adversarial loss:', adversarial_loss_meter.avg)

    if evaluate:
        miou = metrics.miou().item()
        precision = metrics.precision(skip_background=True).item()
        recall = metrics.recall(skip_background=True).item()
        metrics.print_scores_per_class()
        print('mIoU:', miou)
        print('precision:', precision)
        print('recall:', recall)
Example #27
0
File: train.py Project: syt2/CRA
def train(cfg, writer, logger):
    # This statement must be declared before using pytorch
    use_cuda = False
    if cfg.get("cuda", None) is not None:
        if cfg.get("cuda", None) != "all":
            os.environ["CUDA_VISIBLE_DEVICES"] = cfg.get("cuda", None)
        use_cuda = torch.cuda.is_available()

    # Setup random seed
    seed = cfg["training"].get("seed", random.randint(1, 10000))
    torch.manual_seed(seed)
    if use_cuda:
        torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # Setup Dataloader
    train_loader, val_loader = get_loader(cfg)

    # Setup Model
    model = get_model(cfg)
    # writer.add_graph(model, torch.rand([1, 3, 224, 224]))
    if use_cuda and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(
                                          range(torch.cuda.device_count())))

    # Setup optimizer, lr_scheduler and loss function
    optimizer = get_optimizer(model.parameters(), cfg)
    scheduler = get_scheduler(optimizer, cfg)
    loss_fn = get_loss_fn(cfg)

    # Setup Metrics
    epochs = cfg["training"]["epochs"]
    recorder = RecorderMeter(epochs)
    start_epoch = 0

    # save model parameters every <n> epochs
    save_interval = cfg["training"]["save_interval"]

    if use_cuda:
        model.cuda()
        loss_fn.cuda()

    # Resume Trained Model
    resume_path = os.path.join(writer.file_writer.get_logdir(),
                               cfg["training"]["resume"])
    best_path = os.path.join(writer.file_writer.get_logdir(),
                             cfg["training"]["best_model"])

    if cfg["training"]["resume"] is not None:
        if os.path.isfile(resume_path):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    resume_path))
            checkpoint = torch.load(resume_path)
            state = checkpoint["state_dict"]
            if torch.cuda.device_count() <= 1:
                state = convert_state_dict(state)
            model.load_state_dict(state)
            optimizer.load_state_dict(checkpoint["optimizer"])
            scheduler.load_state_dict(checkpoint["scheduler"])
            start_epoch = checkpoint["epoch"]
            recorder = checkpoint['recorder']
            logger.info("Loaded checkpoint '{}' (epoch {})".format(
                resume_path, checkpoint["epoch"]))
        else:
            logger.info("No checkpoint found at '{}'".format(resume_path))

    epoch_time = AverageMeter()
    for epoch in range(start_epoch, epochs):
        start_time = time.time()
        need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg *
                                                            (epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)
        logger.info(
            '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:8.6f}]'.
            format(time_string(), epoch, epochs, need_time, optimizer.
                   param_groups[0]['lr']) +  # scheduler.get_last_lr() >=1.4
            ' [Best : Accuracy={:.2f}]'.format(recorder.max_accuracy(False)))
        train_acc, train_los = train_epoch(train_loader, model, loss_fn,
                                           optimizer, use_cuda, logger)
        val_acc, val_los = validate_epoch(val_loader, model, loss_fn, use_cuda,
                                          logger)
        scheduler.step()

        is_best = recorder.update(epoch, train_los, train_acc, val_los,
                                  val_acc)
        if is_best or epoch % save_interval == 0 or epoch == epochs - 1:  # save model (resume model and best model)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'recorder': recorder,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                }, is_best, best_path, resume_path)

            for name, param in model.named_parameters():  # save histogram
                writer.add_histogram(name,
                                     param.clone().cpu().data.numpy(), epoch)

        writer.add_scalar('Train/loss', train_los, epoch)  # save curves
        writer.add_scalar('Train/acc', train_acc, epoch)
        writer.add_scalar('Val/loss', val_los, epoch)
        writer.add_scalar('Val/acc', val_acc, epoch)

        epoch_time.update(time.time() - start_time)

    writer.close()
Example #28
0
File: train.py Project: syt2/CRA
def train_epoch(train_loader, model, loss_fn, optimizer, use_cuda, logger):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    model.train()
    end_time = time.time()
    for i, (input, label) in enumerate(tqdm(train_loader)):
        data_time.update(time.time() - end_time)
        if use_cuda:
            label = label.cuda()
            input = input.cuda()
        with torch.no_grad():
            input_var = torch.autograd.Variable(input)
            label_var = torch.autograd.Variable(label)
        output = model(input_var)
        loss = loss_fn(output, label_var)
        prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
        losses.update(loss.data, input.size(0))
        top1.update(prec1, input.size(0))
        top5.update(prec5, input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end_time)
        end_time = time.time()

    logger.info(
        '  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
            top1=top1, top5=top5))
    return top1.avg, losses.avg
    def train_epoch(self):
        tqdm_batch = tqdm(self.dataloader,
                          total=self.dataset.num_iterations,
                          desc="epoch-{}".format(self.epoch))

        image_sample = None
        Tensor = torch.cuda.FloatTensor

        avg_generator_loss = AverageMeter()
        avg_barZ_disc_loss = AverageMeter()
        avg_phraseZ_disc_loss = AverageMeter()
        for curr_it, (note, pre_note, pre_phrase,
                      position) in enumerate(tqdm_batch):
            note = note.cuda(async=self.config.async_loading)
            pre_note = pre_note.cuda(async=self.config.async_loading)
            pre_phrase = pre_phrase.cuda(async=self.config.async_loading)
            position = position.cuda(async=self.config.async_loading)

            note = Variable(note)
            pre_note = Variable(pre_note)
            pre_phrase = Variable(pre_phrase)
            position = Variable(position)

            valid_target = Variable(Tensor(note.size(0)).fill_(1.0),
                                    requires_grad=False)
            fake_target = Variable(Tensor(note.size(0)).fill_(0.0),
                                   requires_grad=False)
            valid_target_double = Variable(Tensor(note.size(0) * 2).fill_(1.0),
                                           requires_grad=False)

            self.iteration += 1

            ####################
            self.generator.train()
            self.z_discriminator_bar.train()
            self.z_discriminator_phrase.train()

            self.generator.zero_grad()
            self.z_discriminator_bar.zero_grad()
            self.z_discriminator_phrase.zero_grad()
            if self.epoch > self.pretraining_step_size and (self.epoch +
                                                            curr_it) % 2 is 0:
                #################### Discriminator ####################
                self.free(self.z_discriminator_bar)
                self.free(self.z_discriminator_phrase)

                self.frozen(self.generator)

                _, z, pre_z, phrase_feature = self.generator(
                    note, pre_note, pre_phrase, position)

                #### Phrase Feature ###
                phrase_fake = (torch.randn(phrase_feature.size(0),
                                           phrase_feature.size(1)) *
                               self.config.sigma).cuda()
                d_phrase_fake = self.z_discriminator_phrase(phrase_fake).view(
                    -1)
                d_phrase_real = self.z_discriminator_phrase(
                    phrase_feature).view(-1)
                phraseZ_dics_loss = self.loss_phrase(d_phrase_real, fake_target) +\
                                    self.loss_phrase(d_phrase_fake, valid_target)

                #### Bar Feature ####
                bar_fake = (torch.randn(z.size(0) * 2, z.size(1)) *
                            self.config.sigma).cuda()
                d_bar_fake = self.z_discriminator_bar(bar_fake).view(-1)
                d_bar_real1 = self.z_discriminator_bar(z).view(-1)
                d_bar_real2 = self.z_discriminator_bar(pre_z).view(-1)
                barZ_dics_loss = self.loss_bar(d_bar_real1, fake_target) + self.loss_bar(d_bar_real2, fake_target) + \
                                 self.loss_bar(d_bar_fake, valid_target_double)

                #######################
                phraseZ_dics_loss.backward()
                barZ_dics_loss.backward()

                self.opt_Zdiscriminator_bar.step()
                self.opt_Zdiscriminator_phrase.step()

                avg_barZ_disc_loss.update(barZ_dics_loss)
                avg_phraseZ_disc_loss.update(phraseZ_dics_loss)

            #################### Generator ####################
            self.free(self.generator)

            self.frozen(self.z_discriminator_bar)
            self.frozen(self.z_discriminator_phrase)

            gen_note, z, pre_z, phrase_feature = self.generator(
                note, pre_note, pre_phrase, position)

            image_sample = gen_note
            origin_image = note

            #### Phrase Encoder Loss ###
            d_phrase_real = self.z_discriminator_phrase(phrase_feature).view(
                -1)
            loss = self.loss_phrase(d_phrase_real, valid_target)

            #### Bar Encoder Loss ####
            d_bar_real1 = self.z_discriminator_bar(z).view(-1)
            d_bar_real2 = self.z_discriminator_bar(pre_z).view(-1)
            loss += self.loss_bar(d_bar_real1, valid_target) + self.loss_bar(
                d_bar_real2, valid_target)

            #### Generator Los ####
            loss += self.loss_generator(
                gen_note, note,
                True if self.epoch <= self.pretraining_step_size else False)

            loss.backward()

            self.opt_generator.step()

            avg_generator_loss.update(loss)

            self.summary_writer.add_scalar("train/Generator_loss",
                                           avg_generator_loss.val, self.epoch)
            if self.epoch > self.pretraining_step_size and self.epoch % 2 is 0:
                self.summary_writer.add_scalar(
                    "train/Bar_Z_Discriminator_loss", avg_barZ_disc_loss.val,
                    self.epoch)
                self.summary_writer.add_scalar(
                    "train/Phrase_Z_discriminator_loss",
                    avg_phraseZ_disc_loss.val, self.epoch)

        tqdm_batch.close()

        self.summary_writer.add_image("train/sample 1",
                                      image_sample[0].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/sample 2",
                                      image_sample[1].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/sample 3",
                                      image_sample[2].reshape(1, 96,
                                                              60), self.epoch)

        image_sample = torch.gt(image_sample,
                                0.3).type('torch.cuda.FloatTensor')

        self.summary_writer.add_image("train/sample_binarization 1",
                                      image_sample[0].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/sample_binarization 2",
                                      image_sample[1].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/sample_binarization 3",
                                      image_sample[2].reshape(1, 96,
                                                              60), self.epoch)

        self.summary_writer.add_image("train/origin 1",
                                      origin_image[0].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/origin 2",
                                      origin_image[1].reshape(1, 96,
                                                              60), self.epoch)
        self.summary_writer.add_image("train/origin 3",
                                      origin_image[2].reshape(1, 96,
                                                              60), self.epoch)

        with torch.no_grad():
            self.generator.eval()
            self.z_discriminator_bar.eval()
            self.z_discriminator_phrase.eval()

            outputs = []
            pre_phrase = torch.zeros(1, 1, 384, 60, dtype=torch.float32)
            pre_bar = torch.zeros(1, 1, 96, 60, dtype=torch.float32)
            phrase_idx = [330] + [i for i in range(10 - 2, -1, -1)]
            for idx in range(10):
                bar_set = []
                for _ in range(4):
                    pre_bar = self.generator(
                        torch.randn(1, 1152, dtype=torch.float32).cuda(),
                        pre_bar.cuda(), pre_phrase,
                        torch.from_numpy(np.array([phrase_idx[idx]])), False)
                    pre_bar = torch.gt(pre_bar, 0.3).type(
                        'torch.FloatTensor')  # 1, 1, 96, 96
                    bar_set.append(np.reshape(pre_bar.numpy(), [96, 60]))

                pre_phrase = np.concatenate(bar_set, axis=0)
                outputs.append(pre_phrase)
                pre_phrase = torch.from_numpy(
                    np.reshape(pre_phrase, [1, 1, 96 * 4, 60])).float().cuda()

        self.summary_writer.add_image("eval/generated 1",
                                      outputs[0].reshape(1, 96 * 4,
                                                         60), self.epoch)
        self.summary_writer.add_image("eval/generated 2",
                                      outputs[1].reshape(1, 96 * 4,
                                                         60), self.epoch)

        self.scheduler_generator.step(avg_generator_loss.val)
        if self.epoch > self.pretraining_step_size and (self.epoch +
                                                        curr_it) % 2 is 0:
            self.scheduler_Zdiscriminator_bar.step(avg_barZ_disc_loss.val)
            self.scheduler_Zdiscriminator_phrase.step(
                avg_phraseZ_disc_loss.val)

        self.logger.warning(
            'loss info - generator: {}, barZ disc: {},  phraseZ disc: {}'.
            format(avg_generator_loss.val, avg_barZ_disc_loss.val,
                   avg_phraseZ_disc_loss.val))
        self.logger.warning(
            'lr info - generator: {}, barZ disc: {},  phraseZ disc: {}'.format(
                self.get_lr(self.opt_generator),
                self.get_lr(self.opt_Zdiscriminator_bar),
                self.get_lr(self.opt_Zdiscriminator_phrase)))
Example #30
0
 def on_test_start(state):
     state['loss_meter'] = AverageMeter()
     state['sorted_segments_list'] = []